1 /* $NetBSD: privcmd.c,v 1.66 2022/09/01 15:32:16 bouyer Exp $ */ 2 3 /*- 4 * Copyright (c) 2004 Christian Limpach. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: privcmd.c,v 1.66 2022/09/01 15:32:16 bouyer Exp $"); 31 32 #include "opt_xen.h" 33 34 #include "opt_xen.h" 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/vnode.h> 39 #include <sys/dirent.h> 40 #include <sys/stat.h> 41 #include <sys/proc.h> 42 43 #include <miscfs/specfs/specdev.h> 44 #include <miscfs/kernfs/kernfs.h> 45 46 #include <uvm/uvm.h> 47 #include <uvm/uvm_fault.h> 48 #include <uvm/uvm_fault_i.h> 49 50 #include <xen/kernfs_machdep.h> 51 #include <xen/hypervisor.h> 52 #include <xen/xen.h> 53 #include <xen/xenio.h> 54 #include <xen/xenmem.h> 55 #include <xen/xenpmap.h> 56 #include <xen/granttables.h> 57 58 #define PRIVCMD_MODE (S_IRUSR) 59 60 /* Magic value is used to mark invalid pages. 61 * This must be a value within the page-offset. 62 * Page-aligned values including 0x0 are used by the guest. 63 */ 64 #define INVALID_PAGE 0xfff 65 66 typedef enum _privcmd_type { 67 PTYPE_PRIVCMD, 68 PTYPE_PRIVCMD_PHYSMAP, 69 PTYPE_GNTDEV_REF, 70 PTYPE_GNTDEV_ALLOC 71 } privcmd_type; 72 73 struct privcmd_object_privcmd { 74 paddr_t base_paddr; /* base address of physical space */ 75 paddr_t *maddr; /* array of machine address to map */ 76 int domid; 77 bool no_translate; 78 }; 79 80 struct privcmd_object_gntref { 81 paddr_t base_paddr; /* base address of physical space */ 82 struct ioctl_gntdev_grant_notify notify; 83 struct gnttab_map_grant_ref ops[1]; /* variable length */ 84 }; 85 86 struct privcmd_object_gntalloc { 87 vaddr_t gntva; /* granted area mapped in kernel */ 88 uint16_t domid; 89 uint16_t flags; 90 struct ioctl_gntdev_grant_notify notify; 91 uint32_t gref_ids[1]; /* variable length */ 92 }; 93 94 struct privcmd_object { 95 struct uvm_object uobj; 96 privcmd_type type; 97 int npages; 98 union { 99 struct privcmd_object_privcmd pc; 100 struct privcmd_object_gntref gr; 101 struct privcmd_object_gntalloc ga; 102 } u; 103 }; 104 105 #define PGO_GNTREF_LEN(count) \ 106 (sizeof(struct privcmd_object) + \ 107 sizeof(struct gnttab_map_grant_ref) * ((count) - 1)) 108 109 #define PGO_GNTA_LEN(count) \ 110 (sizeof(struct privcmd_object) + \ 111 sizeof(uint32_t) * ((count) - 1)) 112 113 int privcmd_nobjects = 0; 114 115 static void privpgop_reference(struct uvm_object *); 116 static void privpgop_detach(struct uvm_object *); 117 static int privpgop_fault(struct uvm_faultinfo *, vaddr_t , struct vm_page **, 118 int, int, vm_prot_t, int); 119 static int privcmd_map_obj(struct vm_map *, vaddr_t, 120 struct privcmd_object *, vm_prot_t); 121 122 123 static int 124 privcmd_xen2bsd_errno(int error) 125 { 126 /* 127 * Xen uses System V error codes. 128 * In order to keep bloat as minimal as possible, 129 * only convert what really impact us. 130 */ 131 132 switch(-error) { 133 case 0: 134 return 0; 135 case 1: 136 return EPERM; 137 case 2: 138 return ENOENT; 139 case 3: 140 return ESRCH; 141 case 4: 142 return EINTR; 143 case 5: 144 return EIO; 145 case 6: 146 return ENXIO; 147 case 7: 148 return E2BIG; 149 case 8: 150 return ENOEXEC; 151 case 9: 152 return EBADF; 153 case 10: 154 return ECHILD; 155 case 11: 156 return EAGAIN; 157 case 12: 158 return ENOMEM; 159 case 13: 160 return EACCES; 161 case 14: 162 return EFAULT; 163 case 15: 164 return ENOTBLK; 165 case 16: 166 return EBUSY; 167 case 17: 168 return EEXIST; 169 case 18: 170 return EXDEV; 171 case 19: 172 return ENODEV; 173 case 20: 174 return ENOTDIR; 175 case 21: 176 return EISDIR; 177 case 22: 178 return EINVAL; 179 case 23: 180 return ENFILE; 181 case 24: 182 return EMFILE; 183 case 25: 184 return ENOTTY; 185 case 26: 186 return ETXTBSY; 187 case 27: 188 return EFBIG; 189 case 28: 190 return ENOSPC; 191 case 29: 192 return ESPIPE; 193 case 30: 194 return EROFS; 195 case 31: 196 return EMLINK; 197 case 32: 198 return EPIPE; 199 case 33: 200 return EDOM; 201 case 34: 202 return ERANGE; 203 case 35: 204 return EDEADLK; 205 case 36: 206 return ENAMETOOLONG; 207 case 37: 208 return ENOLCK; 209 case 38: 210 return ENOSYS; 211 case 39: 212 return ENOTEMPTY; 213 case 40: 214 return ELOOP; 215 case 42: 216 return ENOMSG; 217 case 43: 218 return EIDRM; 219 case 60: 220 return ENOSTR; 221 case 61: 222 return ENODATA; 223 case 62: 224 return ETIME; 225 case 63: 226 return ENOSR; 227 case 66: 228 return EREMOTE; 229 case 74: 230 return EBADMSG; 231 case 75: 232 return EOVERFLOW; 233 case 84: 234 return EILSEQ; 235 case 87: 236 return EUSERS; 237 case 88: 238 return ENOTSOCK; 239 case 89: 240 return EDESTADDRREQ; 241 case 90: 242 return EMSGSIZE; 243 case 91: 244 return EPROTOTYPE; 245 case 92: 246 return ENOPROTOOPT; 247 case 93: 248 return EPROTONOSUPPORT; 249 case 94: 250 return ESOCKTNOSUPPORT; 251 case 95: 252 return EOPNOTSUPP; 253 case 96: 254 return EPFNOSUPPORT; 255 case 97: 256 return EAFNOSUPPORT; 257 case 98: 258 return EADDRINUSE; 259 case 99: 260 return EADDRNOTAVAIL; 261 case 100: 262 return ENETDOWN; 263 case 101: 264 return ENETUNREACH; 265 case 102: 266 return ENETRESET; 267 case 103: 268 return ECONNABORTED; 269 case 104: 270 return ECONNRESET; 271 case 105: 272 return ENOBUFS; 273 case 106: 274 return EISCONN; 275 case 107: 276 return ENOTCONN; 277 case 108: 278 return ESHUTDOWN; 279 case 109: 280 return ETOOMANYREFS; 281 case 110: 282 return ETIMEDOUT; 283 case 111: 284 return ECONNREFUSED; 285 case 112: 286 return EHOSTDOWN; 287 case 113: 288 return EHOSTUNREACH; 289 case 114: 290 return EALREADY; 291 case 115: 292 return EINPROGRESS; 293 case 116: 294 return ESTALE; 295 case 122: 296 return EDQUOT; 297 default: 298 printf("unknown xen error code %d\n", -error); 299 return -error; 300 } 301 } 302 303 static vm_prot_t 304 privcmd_get_map_prot(struct vm_map *map, vaddr_t start, off_t size) 305 { 306 vm_prot_t prot; 307 308 vm_map_lock_read(map); 309 /* get protections. This also check for validity of mapping */ 310 if (uvm_map_checkprot(map, start, start + size - 1, VM_PROT_WRITE)) 311 prot = VM_PROT_READ | VM_PROT_WRITE; 312 else if (uvm_map_checkprot(map, start, start + size - 1, VM_PROT_READ)) 313 prot = VM_PROT_READ; 314 else { 315 printf("privcmd_get_map_prot 0x%lx -> 0x%lx " 316 "failed\n", 317 start, (unsigned long)(start + size - 1)); 318 prot = UVM_PROT_NONE; 319 } 320 vm_map_unlock_read(map); 321 return prot; 322 } 323 324 static int 325 privcmd_mmap(struct vop_ioctl_args *ap) 326 { 327 #ifndef XENPV 328 printf("IOCTL_PRIVCMD_MMAP not supported\n"); 329 return EINVAL; 330 #else 331 int i, j; 332 privcmd_mmap_t *mcmd = ap->a_data; 333 privcmd_mmap_entry_t mentry; 334 vaddr_t va; 335 paddr_t ma; 336 struct vm_map *vmm = &curlwp->l_proc->p_vmspace->vm_map; 337 paddr_t *maddr; 338 struct privcmd_object *obj; 339 vm_prot_t prot; 340 int error; 341 342 for (i = 0; i < mcmd->num; i++) { 343 error = copyin(&mcmd->entry[i], &mentry, sizeof(mentry)); 344 if (error) 345 return EINVAL; 346 if (mentry.npages == 0) 347 return EINVAL; 348 if (mentry.va > VM_MAXUSER_ADDRESS) 349 return EINVAL; 350 va = mentry.va & ~PAGE_MASK; 351 prot = privcmd_get_map_prot(vmm, va, mentry.npages * PAGE_SIZE); 352 if (prot == UVM_PROT_NONE) 353 return EINVAL; 354 maddr = kmem_alloc(sizeof(paddr_t) * mentry.npages, 355 KM_SLEEP); 356 ma = ((paddr_t)mentry.mfn) << PGSHIFT; 357 for (j = 0; j < mentry.npages; j++) { 358 maddr[j] = ma; 359 ma += PAGE_SIZE; 360 } 361 obj = kmem_alloc(sizeof(*obj), KM_SLEEP); 362 obj->type = PTYPE_PRIVCMD; 363 obj->u.pc.maddr = maddr; 364 obj->u.pc.no_translate = false; 365 obj->npages = mentry.npages; 366 obj->u.pc.domid = mcmd->dom; 367 error = privcmd_map_obj(vmm, va, obj, prot); 368 if (error) 369 return error; 370 } 371 return 0; 372 #endif 373 } 374 375 static int 376 privcmd_mmapbatch(struct vop_ioctl_args *ap) 377 { 378 #ifndef XENPV 379 printf("IOCTL_PRIVCMD_MMAPBATCH not supported\n"); 380 return EINVAL; 381 #else 382 int i; 383 privcmd_mmapbatch_t* pmb = ap->a_data; 384 vaddr_t va0; 385 u_long mfn; 386 paddr_t ma; 387 struct vm_map *vmm; 388 vaddr_t trymap; 389 paddr_t *maddr; 390 struct privcmd_object *obj; 391 vm_prot_t prot; 392 int error; 393 394 vmm = &curlwp->l_proc->p_vmspace->vm_map; 395 va0 = pmb->addr & ~PAGE_MASK; 396 397 if (pmb->num == 0) 398 return EINVAL; 399 if (va0 > VM_MAXUSER_ADDRESS) 400 return EINVAL; 401 if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < pmb->num) 402 return EINVAL; 403 404 prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE); 405 if (prot == UVM_PROT_NONE) 406 return EINVAL; 407 408 maddr = kmem_alloc(sizeof(paddr_t) * pmb->num, KM_SLEEP); 409 /* get a page of KVA to check mappins */ 410 trymap = uvm_km_alloc(kernel_map, PAGE_SIZE, PAGE_SIZE, 411 UVM_KMF_VAONLY); 412 if (trymap == 0) { 413 kmem_free(maddr, sizeof(paddr_t) * pmb->num); 414 return ENOMEM; 415 } 416 417 obj = kmem_alloc(sizeof(*obj), KM_SLEEP); 418 obj->type = PTYPE_PRIVCMD; 419 obj->u.pc.maddr = maddr; 420 obj->u.pc.no_translate = false; 421 obj->npages = pmb->num; 422 obj->u.pc.domid = pmb->dom; 423 424 for(i = 0; i < pmb->num; ++i) { 425 error = copyin(&pmb->arr[i], &mfn, sizeof(mfn)); 426 if (error != 0) { 427 /* XXX: mappings */ 428 pmap_update(pmap_kernel()); 429 kmem_free(maddr, sizeof(paddr_t) * pmb->num); 430 uvm_km_free(kernel_map, trymap, PAGE_SIZE, 431 UVM_KMF_VAONLY); 432 return error; 433 } 434 ma = ((paddr_t)mfn) << PGSHIFT; 435 if ((error = pmap_enter_ma(pmap_kernel(), trymap, ma, 0, 436 prot, PMAP_CANFAIL | prot, pmb->dom))) { 437 mfn |= 0xF0000000; 438 copyout(&mfn, &pmb->arr[i], sizeof(mfn)); 439 maddr[i] = INVALID_PAGE; 440 } else { 441 pmap_remove(pmap_kernel(), trymap, 442 trymap + PAGE_SIZE); 443 maddr[i] = ma; 444 } 445 } 446 pmap_update(pmap_kernel()); 447 uvm_km_free(kernel_map, trymap, PAGE_SIZE, UVM_KMF_VAONLY); 448 449 error = privcmd_map_obj(vmm, va0, obj, prot); 450 451 return error; 452 #endif 453 } 454 455 static int 456 privcmd_mmapbatch_v2(struct vop_ioctl_args *ap) 457 { 458 int i; 459 privcmd_mmapbatch_v2_t* pmb = ap->a_data; 460 vaddr_t va0; 461 u_long mfn; 462 struct vm_map *vmm; 463 paddr_t *maddr; 464 struct privcmd_object *obj; 465 vm_prot_t prot; 466 int error; 467 paddr_t base_paddr = 0; 468 469 vmm = &curlwp->l_proc->p_vmspace->vm_map; 470 va0 = pmb->addr & ~PAGE_MASK; 471 472 if (pmb->num == 0) 473 return EINVAL; 474 if (va0 > VM_MAXUSER_ADDRESS) 475 return EINVAL; 476 if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < pmb->num) 477 return EINVAL; 478 479 prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE); 480 if (prot == UVM_PROT_NONE) 481 return EINVAL; 482 483 #ifndef XENPV 484 KASSERT(xen_feature(XENFEAT_auto_translated_physmap)); 485 base_paddr = xenmem_alloc_pa(pmb->num * PAGE_SIZE, PAGE_SIZE, true); 486 KASSERT(base_paddr != 0); 487 #endif 488 maddr = kmem_alloc(sizeof(paddr_t) * pmb->num, KM_SLEEP); 489 obj = kmem_alloc(sizeof(*obj), KM_SLEEP); 490 obj->type = PTYPE_PRIVCMD_PHYSMAP; 491 obj->u.pc.maddr = maddr; 492 obj->u.pc.base_paddr = base_paddr; 493 obj->u.pc.no_translate = false; 494 obj->npages = pmb->num; 495 obj->u.pc.domid = pmb->dom; 496 497 for(i = 0; i < pmb->num; ++i) { 498 error = copyin(&pmb->arr[i], &mfn, sizeof(mfn)); 499 if (error != 0) { 500 kmem_free(maddr, sizeof(paddr_t) * pmb->num); 501 kmem_free(obj, sizeof(*obj)); 502 #ifndef XENPV 503 xenmem_free_pa(base_paddr, pmb->num * PAGE_SIZE); 504 #endif 505 return error; 506 } 507 #ifdef XENPV 508 maddr[i] = ((paddr_t)mfn) << PGSHIFT; 509 #else 510 maddr[i] = mfn; /* TMP argument for XENMEM_add_to_physmap */ 511 #endif 512 513 } 514 error = privcmd_map_obj(vmm, va0, obj, prot); 515 if (error) 516 return error; 517 518 /* 519 * map the range in user process now. 520 * If Xenr return -ENOENT, retry (paging in progress) 521 */ 522 for(i = 0; i < pmb->num; i++, va0 += PAGE_SIZE) { 523 int err, cerr; 524 #ifdef XENPV 525 for (int j = 0 ; j < 10; j++) { 526 err = pmap_enter_ma(vmm->pmap, va0, maddr[i], 0, 527 prot, PMAP_CANFAIL | prot, 528 pmb->dom); 529 if (err != -2) /* Xen ENOENT */ 530 break; 531 if (kpause("xnoent", 1, mstohz(100), NULL)) 532 break; 533 } 534 if (err) { 535 maddr[i] = INVALID_PAGE; 536 } 537 #else /* XENPV */ 538 xen_add_to_physmap_batch_t add; 539 u_long idx; 540 xen_pfn_t gpfn; 541 int err2; 542 memset(&add, 0, sizeof(add)); 543 544 add.domid = DOMID_SELF; 545 add.space = XENMAPSPACE_gmfn_foreign; 546 add.size = 1; 547 add.foreign_domid = pmb->dom; 548 idx = maddr[i]; 549 set_xen_guest_handle(add.idxs, &idx); 550 maddr[i] = INVALID_PAGE; 551 gpfn = (base_paddr >> PGSHIFT) + i; 552 set_xen_guest_handle(add.gpfns, &gpfn); 553 err2 = 0; 554 set_xen_guest_handle(add.errs, &err2); 555 err = HYPERVISOR_memory_op(XENMEM_add_to_physmap_batch, &add); 556 if (err < 0) { 557 printf("privcmd_mmapbatch_v2: XENMEM_add_to_physmap_batch failed %d\n", err); 558 privpgop_detach(&obj->uobj); 559 return privcmd_xen2bsd_errno(err); 560 } 561 err = err2; 562 if (err == 0) 563 maddr[i] = base_paddr + i * PAGE_SIZE; 564 #endif /* XENPV */ 565 566 cerr = copyout(&err, &pmb->err[i], sizeof(pmb->err[i])); 567 if (cerr) { 568 privpgop_detach(&obj->uobj); 569 return cerr; 570 } 571 } 572 return 0; 573 } 574 575 static int 576 privcmd_mmap_resource(struct vop_ioctl_args *ap) 577 { 578 int i; 579 privcmd_mmap_resource_t* pmr = ap->a_data; 580 vaddr_t va0; 581 struct vm_map *vmm; 582 struct privcmd_object *obj; 583 vm_prot_t prot; 584 int error; 585 struct xen_mem_acquire_resource op; 586 xen_pfn_t *pfns; 587 paddr_t *maddr; 588 paddr_t base_paddr = 0; 589 590 vmm = &curlwp->l_proc->p_vmspace->vm_map; 591 va0 = pmr->addr & ~PAGE_MASK; 592 593 if (pmr->num == 0) 594 return EINVAL; 595 if (va0 > VM_MAXUSER_ADDRESS) 596 return EINVAL; 597 if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < pmr->num) 598 return EINVAL; 599 600 prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE); 601 if (prot == UVM_PROT_NONE) 602 return EINVAL; 603 604 pfns = kmem_alloc(sizeof(xen_pfn_t) * pmr->num, KM_SLEEP); 605 #ifndef XENPV 606 KASSERT(xen_feature(XENFEAT_auto_translated_physmap)); 607 base_paddr = xenmem_alloc_pa(pmr->num * PAGE_SIZE, PAGE_SIZE, true); 608 KASSERT(base_paddr != 0); 609 for (i = 0; i < pmr->num; i++) { 610 pfns[i] = (base_paddr >> PGSHIFT) + i; 611 } 612 #else 613 KASSERT(!xen_feature(XENFEAT_auto_translated_physmap)); 614 #endif 615 616 memset(&op, 0, sizeof(op)); 617 op.domid = pmr->dom; 618 op.type = pmr->type; 619 op.id = pmr->id; 620 op.frame = pmr->idx; 621 op.nr_frames = pmr->num; 622 set_xen_guest_handle(op.frame_list, pfns); 623 624 error = HYPERVISOR_memory_op(XENMEM_acquire_resource, &op); 625 if (error) { 626 printf("%s: XENMEM_acquire_resource failed: %d\n", 627 __func__, error); 628 return privcmd_xen2bsd_errno(error); 629 } 630 maddr = kmem_alloc(sizeof(paddr_t) * pmr->num, KM_SLEEP); 631 for (i = 0; i < pmr->num; i++) { 632 maddr[i] = pfns[i] << PGSHIFT; 633 } 634 kmem_free(pfns, sizeof(xen_pfn_t) * pmr->num); 635 636 obj = kmem_alloc(sizeof(*obj), KM_SLEEP); 637 obj->type = PTYPE_PRIVCMD_PHYSMAP; 638 obj->u.pc.base_paddr = base_paddr; 639 obj->u.pc.maddr = maddr; 640 obj->u.pc.no_translate = true; 641 obj->npages = pmr->num; 642 obj->u.pc.domid = (op.flags & XENMEM_rsrc_acq_caller_owned) ? 643 DOMID_SELF : pmr->dom; 644 645 error = privcmd_map_obj(vmm, va0, obj, prot); 646 return error; 647 } 648 649 static int 650 privcmd_map_gref(struct vop_ioctl_args *ap) 651 { 652 struct ioctl_gntdev_mmap_grant_ref *mgr = ap->a_data; 653 struct vm_map *vmm = &curlwp->l_proc->p_vmspace->vm_map; 654 struct privcmd_object *obj; 655 vaddr_t va0 = (vaddr_t)mgr->va & ~PAGE_MASK; 656 vm_prot_t prot; 657 int error; 658 659 if (mgr->count == 0) 660 return EINVAL; 661 if (va0 > VM_MAXUSER_ADDRESS) 662 return EINVAL; 663 if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < mgr->count) 664 return EINVAL; 665 if (mgr->notify.offset < 0 || mgr->notify.offset > mgr->count) 666 return EINVAL; 667 668 prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE); 669 if (prot == UVM_PROT_NONE) 670 return EINVAL; 671 672 obj = kmem_alloc(PGO_GNTREF_LEN(mgr->count), KM_SLEEP); 673 674 obj->type = PTYPE_GNTDEV_REF; 675 obj->npages = mgr->count; 676 memcpy(&obj->u.gr.notify, &mgr->notify, 677 sizeof(obj->u.gr.notify)); 678 #ifndef XENPV 679 KASSERT(xen_feature(XENFEAT_auto_translated_physmap)); 680 obj->u.gr.base_paddr = xenmem_alloc_pa(obj->npages * PAGE_SIZE, 681 PAGE_SIZE, true); 682 KASSERT(obj->u.gr.base_paddr != 0); 683 #else 684 obj->u.gr.base_paddr = 0; 685 #endif /* !XENPV */ 686 687 for (int i = 0; i < obj->npages; ++i) { 688 struct ioctl_gntdev_grant_ref gref; 689 error = copyin(&mgr->refs[i], &gref, sizeof(gref)); 690 if (error != 0) { 691 goto err1; 692 } 693 #ifdef XENPV 694 obj->u.gr.ops[i].host_addr = 0; 695 obj->u.gr.ops[i].flags = GNTMAP_host_map | 696 GNTMAP_application_map | GNTMAP_contains_pte; 697 #else /* XENPV */ 698 obj->u.gr.ops[i].host_addr = 699 obj->u.gr.base_paddr + PAGE_SIZE * i; 700 obj->u.gr.ops[i].flags = GNTMAP_host_map; 701 #endif /* XENPV */ 702 obj->u.gr.ops[i].dev_bus_addr = 0; 703 obj->u.gr.ops[i].ref = gref.ref; 704 obj->u.gr.ops[i].dom = gref.domid; 705 obj->u.gr.ops[i].handle = -1; 706 if (prot == UVM_PROT_READ) 707 obj->u.gr.ops[i].flags |= GNTMAP_readonly; 708 } 709 error = privcmd_map_obj(vmm, va0, obj, prot); 710 return error; 711 err1: 712 #ifndef XENPV 713 xenmem_free_pa(obj->u.gr.base_paddr, obj->npages * PAGE_SIZE); 714 #endif 715 kmem_free(obj, PGO_GNTREF_LEN(obj->npages)); 716 return error; 717 } 718 719 static int 720 privcmd_alloc_gref(struct vop_ioctl_args *ap) 721 { 722 struct ioctl_gntdev_alloc_grant_ref *mga = ap->a_data; 723 struct vm_map *vmm = &curlwp->l_proc->p_vmspace->vm_map; 724 struct privcmd_object *obj; 725 vaddr_t va0 = (vaddr_t)mga->va & ~PAGE_MASK; 726 vm_prot_t prot; 727 int error, ret; 728 729 if (mga->count == 0) 730 return EINVAL; 731 if (va0 > VM_MAXUSER_ADDRESS) 732 return EINVAL; 733 if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < mga->count) 734 return EINVAL; 735 if (mga->notify.offset < 0 || mga->notify.offset > mga->count) 736 return EINVAL; 737 738 prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE); 739 if (prot == UVM_PROT_NONE) 740 return EINVAL; 741 742 obj = kmem_alloc(PGO_GNTA_LEN(mga->count), KM_SLEEP); 743 744 obj->type = PTYPE_GNTDEV_ALLOC; 745 obj->npages = mga->count; 746 obj->u.ga.domid = mga->domid; 747 memcpy(&obj->u.ga.notify, &mga->notify, 748 sizeof(obj->u.ga.notify)); 749 obj->u.ga.gntva = uvm_km_alloc(kernel_map, 750 PAGE_SIZE * obj->npages, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO); 751 if (obj->u.ga.gntva == 0) { 752 error = ENOMEM; 753 goto err1; 754 } 755 756 for (int i = 0; i < obj->npages; ++i) { 757 paddr_t ma; 758 vaddr_t va = obj->u.ga.gntva + i * PAGE_SIZE; 759 grant_ref_t id; 760 bool ro = ((mga->flags & GNTDEV_ALLOC_FLAG_WRITABLE) == 0); 761 (void)pmap_extract_ma(pmap_kernel(), va, &ma); 762 if ((ret = xengnt_grant_access(mga->domid, ma, ro, &id)) != 0) { 763 printf("%s: xengnt_grant_access failed: %d\n", 764 __func__, ret); 765 for (int j = 0; j < i; j++) { 766 xengnt_revoke_access(obj->u.ga.gref_ids[j]); 767 error = ret; 768 goto err2; 769 } 770 } 771 obj->u.ga.gref_ids[i] = id; 772 } 773 774 error = copyout(&obj->u.ga.gref_ids[0], mga->gref_ids, 775 sizeof(uint32_t) * obj->npages); 776 if (error) { 777 for (int i = 0; i < obj->npages; ++i) { 778 xengnt_revoke_access(obj->u.ga.gref_ids[i]); 779 } 780 goto err2; 781 } 782 783 error = privcmd_map_obj(vmm, va0, obj, prot); 784 return error; 785 786 err2: 787 uvm_km_free(kernel_map, obj->u.ga.gntva, 788 PAGE_SIZE * obj->npages, UVM_KMF_WIRED); 789 err1: 790 kmem_free(obj, PGO_GNTA_LEN(obj->npages)); 791 return error; 792 } 793 794 static int 795 privcmd_ioctl(void *v) 796 { 797 struct vop_ioctl_args /* { 798 const struct vnodeop_desc *a_desc; 799 struct vnode *a_vp; 800 u_long a_command; 801 void *a_data; 802 int a_fflag; 803 kauth_cred_t a_cred; 804 } */ *ap = v; 805 int error = 0; 806 807 switch (ap->a_command) { 808 case IOCTL_PRIVCMD_HYPERCALL: 809 case IOCTL_PRIVCMD_HYPERCALL_OLD: 810 /* 811 * oprivcmd_hypercall_t is privcmd_hypercall_t without the last entry 812 */ 813 { 814 privcmd_hypercall_t *hc = ap->a_data; 815 if (hc->op >= (PAGE_SIZE >> 5)) 816 return EINVAL; 817 error = -EOPNOTSUPP; 818 #if defined(__i386__) 819 __asm volatile ( 820 "pushl %%ebx; pushl %%ecx; pushl %%edx;" 821 "pushl %%esi; pushl %%edi; " 822 "movl 4(%%eax),%%ebx ;" 823 "movl 8(%%eax),%%ecx ;" 824 "movl 12(%%eax),%%edx ;" 825 "movl 16(%%eax),%%esi ;" 826 "movl 20(%%eax),%%edi ;" 827 "movl (%%eax),%%eax ;" 828 "shll $5,%%eax ;" 829 "addl $hypercall_page,%%eax ;" 830 "call *%%eax ;" 831 "popl %%edi; popl %%esi; popl %%edx;" 832 "popl %%ecx; popl %%ebx" 833 : "=a" (error) : "0" (ap->a_data) : "memory" ); 834 #endif /* __i386__ */ 835 #if defined(__x86_64__) 836 #ifndef XENPV 837 /* hypervisor can't access user memory if SMAP is enabled */ 838 smap_disable(); 839 #endif 840 { 841 long i1, i2, i3; 842 __asm volatile ( 843 "movq %8,%%r10; movq %9,%%r8;" 844 "shll $5,%%eax ;" 845 "addq $hypercall_page,%%rax ;" 846 "call *%%rax" 847 : "=a" (error), "=D" (i1), 848 "=S" (i2), "=d" (i3) 849 : "0" ((unsigned int)hc->op), 850 "1" (hc->arg[0]), 851 "2" (hc->arg[1]), 852 "3" (hc->arg[2]), 853 "g" (hc->arg[3]), 854 "g" (hc->arg[4]) 855 : "r8", "r10", "memory" ); 856 } 857 #ifndef XENPV 858 smap_enable(); 859 #endif 860 #endif /* __x86_64__ */ 861 if (ap->a_command == IOCTL_PRIVCMD_HYPERCALL) { 862 if (error >= 0) { 863 hc->retval = error; 864 error = 0; 865 } else { 866 /* error occurred, return the errno */ 867 error = privcmd_xen2bsd_errno(error); 868 hc->retval = 0; 869 } 870 } else { 871 error = privcmd_xen2bsd_errno(error); 872 } 873 break; 874 } 875 case IOCTL_PRIVCMD_MMAP: 876 return privcmd_mmap(ap); 877 878 case IOCTL_PRIVCMD_MMAPBATCH: 879 return privcmd_mmapbatch(ap); 880 881 case IOCTL_PRIVCMD_MMAPBATCH_V2: 882 return privcmd_mmapbatch_v2(ap); 883 884 case IOCTL_PRIVCMD_MMAP_RESOURCE: 885 return privcmd_mmap_resource(ap); 886 887 case IOCTL_GNTDEV_MMAP_GRANT_REF: 888 return privcmd_map_gref(ap); 889 890 case IOCTL_GNTDEV_ALLOC_GRANT_REF: 891 return privcmd_alloc_gref(ap); 892 default: 893 error = EINVAL; 894 } 895 896 return error; 897 } 898 899 static const struct uvm_pagerops privpgops = { 900 .pgo_reference = privpgop_reference, 901 .pgo_detach = privpgop_detach, 902 .pgo_fault = privpgop_fault, 903 }; 904 905 static void 906 privpgop_reference(struct uvm_object *uobj) 907 { 908 rw_enter(uobj->vmobjlock, RW_WRITER); 909 uobj->uo_refs++; 910 rw_exit(uobj->vmobjlock); 911 } 912 913 static void 914 privcmd_notify(struct ioctl_gntdev_grant_notify *notify, vaddr_t va, 915 struct gnttab_map_grant_ref *gmops) 916 { 917 if (notify->action & UNMAP_NOTIFY_SEND_EVENT) { 918 hypervisor_notify_via_evtchn(notify->event_channel_port); 919 } 920 if ((notify->action & UNMAP_NOTIFY_CLEAR_BYTE) == 0) { 921 notify->action = 0; 922 return; 923 } 924 if (va == 0) { 925 struct gnttab_map_grant_ref op; 926 struct gnttab_unmap_grant_ref uop; 927 int i = notify->offset / PAGE_SIZE; 928 int o = notify->offset % PAGE_SIZE; 929 int err; 930 #ifndef XENPV 931 paddr_t base_paddr; 932 base_paddr = xenmem_alloc_pa(PAGE_SIZE, PAGE_SIZE, true); 933 #endif 934 935 KASSERT(gmops != NULL); 936 va = uvm_km_alloc(kernel_map, PAGE_SIZE, PAGE_SIZE, 937 UVM_KMF_VAONLY | UVM_KMF_WAITVA); 938 #ifndef XENPV 939 op.host_addr = base_paddr; 940 #else 941 op.host_addr = va; 942 #endif 943 op.dev_bus_addr = 0; 944 op.ref = gmops[i].ref; 945 op.dom = gmops[i].dom; 946 op.handle = -1; 947 op.flags = GNTMAP_host_map; 948 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); 949 if (err == 0 && op.status == GNTST_okay) { 950 #ifndef XENPV 951 pmap_kenter_pa(va, base_paddr, 952 VM_PROT_READ | VM_PROT_WRITE, 0); 953 #endif 954 char *n = (void *)(va + o); 955 *n = 0; 956 #ifndef XENPV 957 pmap_kremove(va, PAGE_SIZE); 958 uop.host_addr = base_paddr; 959 #else 960 uop.host_addr = va; 961 #endif 962 uop.handle = op.handle; 963 uop.dev_bus_addr = 0; 964 (void)HYPERVISOR_grant_table_op( 965 GNTTABOP_unmap_grant_ref, &uop, 1); 966 } 967 uvm_km_free(kernel_map, va, PAGE_SIZE, UVM_KMF_VAONLY); 968 #ifndef XENPV 969 xenmem_free_pa(base_paddr, PAGE_SIZE); 970 #endif 971 } else { 972 KASSERT(gmops == NULL); 973 char *n = (void *)(va + notify->offset); 974 *n = 0; 975 } 976 notify->action = 0; 977 } 978 979 static void 980 privpgop_detach(struct uvm_object *uobj) 981 { 982 struct privcmd_object *pobj = (struct privcmd_object *)uobj; 983 984 rw_enter(uobj->vmobjlock, RW_WRITER); 985 KASSERT(uobj->uo_refs > 0); 986 if (uobj->uo_refs > 1) { 987 uobj->uo_refs--; 988 rw_exit(uobj->vmobjlock); 989 return; 990 } 991 rw_exit(uobj->vmobjlock); 992 switch (pobj->type) { 993 case PTYPE_PRIVCMD_PHYSMAP: 994 #ifndef XENPV 995 for (int i = 0; i < pobj->npages; i++) { 996 if (pobj->u.pc.maddr[i] != INVALID_PAGE) { 997 struct xen_remove_from_physmap rm; 998 rm.domid = DOMID_SELF; 999 rm.gpfn = pobj->u.pc.maddr[i] >> PGSHIFT; 1000 HYPERVISOR_memory_op( 1001 XENMEM_remove_from_physmap, &rm); 1002 } 1003 } 1004 xenmem_free_pa(pobj->u.pc.base_paddr, pobj->npages * PAGE_SIZE); 1005 #endif 1006 /* FALLTHROUGH */ 1007 case PTYPE_PRIVCMD: 1008 kmem_free(pobj->u.pc.maddr, sizeof(paddr_t) * pobj->npages); 1009 uvm_obj_destroy(uobj, true); 1010 kmem_free(pobj, sizeof(struct privcmd_object)); 1011 break; 1012 case PTYPE_GNTDEV_REF: 1013 { 1014 privcmd_notify(&pobj->u.gr.notify, 0, pobj->u.gr.ops); 1015 #ifndef XENPV 1016 KASSERT(pobj->u.gr.base_paddr != 0); 1017 for (int i = 0; i < pobj->npages; i++) { 1018 struct xen_remove_from_physmap rm; 1019 rm.domid = DOMID_SELF; 1020 rm.gpfn = (pobj->u.gr.base_paddr << PGSHIFT) + i; 1021 HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &rm); 1022 } 1023 xenmem_free_pa(pobj->u.gr.base_paddr, pobj->npages * PAGE_SIZE); 1024 #endif 1025 kmem_free(pobj, PGO_GNTREF_LEN(pobj->npages)); 1026 break; 1027 } 1028 case PTYPE_GNTDEV_ALLOC: 1029 privcmd_notify(&pobj->u.ga.notify, pobj->u.ga.gntva, NULL); 1030 for (int i = 0; i < pobj->npages; ++i) { 1031 xengnt_revoke_access(pobj->u.ga.gref_ids[i]); 1032 } 1033 uvm_km_free(kernel_map, pobj->u.ga.gntva, 1034 PAGE_SIZE * pobj->npages, UVM_KMF_WIRED); 1035 kmem_free(pobj, PGO_GNTA_LEN(pobj->npages)); 1036 } 1037 privcmd_nobjects--; 1038 } 1039 1040 static int 1041 privpgop_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps, 1042 int npages, int centeridx, vm_prot_t access_type, int flags) 1043 { 1044 struct vm_map_entry *entry = ufi->entry; 1045 struct uvm_object *uobj = entry->object.uvm_obj; 1046 struct privcmd_object *pobj = (struct privcmd_object*)uobj; 1047 int maddr_i, i, error = 0; 1048 1049 /* compute offset from start of map */ 1050 maddr_i = (entry->offset + (vaddr - entry->start)) >> PAGE_SHIFT; 1051 if (maddr_i + npages > pobj->npages) { 1052 return EINVAL; 1053 } 1054 for (i = 0; i < npages; i++, maddr_i++, vaddr+= PAGE_SIZE) { 1055 if ((flags & PGO_ALLPAGES) == 0 && i != centeridx) 1056 continue; 1057 if (pps[i] == PGO_DONTCARE) 1058 continue; 1059 switch(pobj->type) { 1060 case PTYPE_PRIVCMD: 1061 case PTYPE_PRIVCMD_PHYSMAP: 1062 { 1063 u_int pm_flags = PMAP_CANFAIL | ufi->entry->protection; 1064 #ifdef XENPV 1065 if (pobj->u.pc.no_translate) 1066 pm_flags |= PMAP_MD_XEN_NOTR; 1067 #endif 1068 if (pobj->u.pc.maddr[maddr_i] == INVALID_PAGE) { 1069 /* This has already been flagged as error. */ 1070 error = EFAULT; 1071 goto out; 1072 } 1073 error = pmap_enter_ma(ufi->orig_map->pmap, vaddr, 1074 pobj->u.pc.maddr[maddr_i], 0, 1075 ufi->entry->protection, pm_flags, 1076 pobj->u.pc.domid); 1077 if (error == ENOMEM) { 1078 goto out; 1079 } 1080 if (error) { 1081 pobj->u.pc.maddr[maddr_i] = INVALID_PAGE; 1082 error = EFAULT; 1083 } 1084 break; 1085 } 1086 case PTYPE_GNTDEV_REF: 1087 { 1088 struct pmap *pmap = ufi->orig_map->pmap; 1089 if (pmap_enter_gnt(pmap, vaddr, entry->start, pobj->npages, &pobj->u.gr.ops[0]) != GNTST_okay) { 1090 error = EFAULT; 1091 goto out; 1092 } 1093 break; 1094 } 1095 case PTYPE_GNTDEV_ALLOC: 1096 { 1097 paddr_t pa; 1098 if (!pmap_extract(pmap_kernel(), 1099 pobj->u.ga.gntva + maddr_i * PAGE_SIZE, &pa)) { 1100 error = EFAULT; 1101 goto out; 1102 } 1103 error = pmap_enter(ufi->orig_map->pmap, vaddr, pa, 1104 ufi->entry->protection, 1105 PMAP_CANFAIL | ufi->entry->protection); 1106 if (error == ENOMEM) { 1107 goto out; 1108 } 1109 break; 1110 } 1111 } 1112 if (error) { 1113 /* XXX for proper ptp accountings */ 1114 pmap_remove(ufi->orig_map->pmap, vaddr, 1115 vaddr + PAGE_SIZE); 1116 } 1117 } 1118 out: 1119 pmap_update(ufi->orig_map->pmap); 1120 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj); 1121 return error; 1122 } 1123 1124 static int 1125 privcmd_map_obj(struct vm_map *map, vaddr_t start, struct privcmd_object *obj, 1126 vm_prot_t prot) 1127 { 1128 int error; 1129 uvm_flag_t uvmflag; 1130 vaddr_t newstart = start; 1131 off_t size = ((off_t)obj->npages << PGSHIFT); 1132 1133 privcmd_nobjects++; 1134 uvm_obj_init(&obj->uobj, &privpgops, true, 1); 1135 uvmflag = UVM_MAPFLAG(prot, prot, UVM_INH_NONE, UVM_ADV_NORMAL, 1136 UVM_FLAG_FIXED | UVM_FLAG_UNMAP | UVM_FLAG_NOMERGE); 1137 error = uvm_map(map, &newstart, size, &obj->uobj, 0, 0, uvmflag); 1138 1139 if (error) 1140 obj->uobj.pgops->pgo_detach(&obj->uobj); 1141 return error; 1142 } 1143 1144 static const struct kernfs_fileop privcmd_fileops[] = { 1145 { .kf_fileop = KERNFS_FILEOP_IOCTL, .kf_vop = privcmd_ioctl }, 1146 }; 1147 1148 void 1149 xenprivcmd_init(void) 1150 { 1151 kernfs_entry_t *dkt; 1152 kfstype kfst; 1153 1154 if (!xendomain_is_privileged()) 1155 return; 1156 1157 kfst = KERNFS_ALLOCTYPE(privcmd_fileops); 1158 1159 KERNFS_ALLOCENTRY(dkt, KM_SLEEP); 1160 KERNFS_INITENTRY(dkt, DT_REG, "privcmd", NULL, kfst, VREG, 1161 PRIVCMD_MODE); 1162 kernfs_addentry(kernxen_pkt, dkt); 1163 } 1164