1 /* $NetBSD: uvm_mmap.c,v 1.189 2026/03/21 03:17:25 yamt Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993 The Regents of the University of California. 6 * Copyright (c) 1988 University of Utah. 7 * 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * the Systems Programming Group of the University of Utah Computer 12 * Science Department. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94 40 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp 41 */ 42 43 /* 44 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap 45 * function. 46 */ 47 48 #include <sys/cdefs.h> 49 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.189 2026/03/21 03:17:25 yamt Exp $"); 50 51 #include "opt_compat_netbsd.h" 52 #include "opt_pax.h" 53 54 #include <sys/param.h> 55 #include <sys/types.h> 56 #include <sys/file.h> 57 #include <sys/filedesc.h> 58 #include <sys/resourcevar.h> 59 #include <sys/mman.h> 60 #include <sys/pax.h> 61 62 #include <sys/syscallargs.h> 63 64 #include <uvm/uvm.h> 65 #include <uvm/uvm_device.h> 66 67 static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t, 68 int, int, struct uvm_object *, voff_t, vsize_t); 69 70 static int 71 range_test(const struct vm_map *map, vaddr_t addr, vsize_t size, bool ismmap) 72 { 73 vaddr_t vm_min_address = vm_map_min(map); 74 vaddr_t vm_max_address = vm_map_max(map); 75 vaddr_t eaddr = addr + size; 76 int res = 0; 77 78 if (addr < vm_min_address) 79 return EINVAL; 80 if (eaddr > vm_max_address) 81 return ismmap ? EFBIG : EINVAL; 82 if (addr > eaddr) /* no wrapping! */ 83 return ismmap ? EOVERFLOW : EINVAL; 84 85 #ifdef MD_MMAP_RANGE_TEST 86 res = MD_MMAP_RANGE_TEST(addr, eaddr); 87 #endif 88 89 return res; 90 } 91 92 /* 93 * align the address to a page boundary, and adjust the size accordingly 94 */ 95 static int 96 round_and_check(const struct vm_map *map, vaddr_t *addr, vsize_t *size) 97 { 98 const vsize_t pageoff = (vsize_t)(*addr & PAGE_MASK); 99 100 *addr -= pageoff; 101 102 if (*size != 0) { 103 vsize_t orig = *size; 104 *size += pageoff; 105 *size = (vsize_t)round_page(*size); 106 if (*size < orig || *addr + *size < *addr) { 107 return ENOMEM; 108 } 109 } 110 111 return range_test(map, *addr, *size, false); 112 } 113 114 static int 115 mincore_chunk(struct vm_map *map, vaddr_t start, vaddr_t end, char *vec) 116 { 117 struct vm_page *pg; 118 char pgi; 119 struct uvm_object *uobj; 120 struct vm_amap *amap; 121 struct vm_anon *anon; 122 struct vm_map_entry *entry; 123 vaddr_t lim; 124 int error = 0; 125 126 vm_map_lock_read(map); 127 128 if (uvm_map_lookup_entry(map, start, &entry) == false) { 129 error = ENOMEM; 130 goto out; 131 } 132 133 for (/* nothing */; 134 entry != &map->header && entry->start < end; 135 entry = entry->next) { 136 KASSERT(!UVM_ET_ISSUBMAP(entry)); 137 KASSERT(start >= entry->start); 138 139 /* Make sure there are no holes. */ 140 if (entry->end < end && 141 (entry->next == &map->header || 142 entry->next->start > entry->end)) { 143 error = ENOMEM; 144 goto out; 145 } 146 147 lim = end < entry->end ? end : entry->end; 148 149 /* 150 * Special case for objects with no "real" pages. Those 151 * are always considered resident (mapped devices). 152 */ 153 154 if (UVM_ET_ISOBJ(entry)) { 155 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)); 156 if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) { 157 for (/* nothing */; start < lim; 158 start += PAGE_SIZE, vec++) 159 *vec = 1; 160 continue; 161 } 162 } 163 164 amap = entry->aref.ar_amap; /* upper layer */ 165 uobj = entry->object.uvm_obj; /* lower layer */ 166 167 if (amap != NULL) 168 amap_lock(amap, RW_READER); 169 if (uobj != NULL) 170 rw_enter(uobj->vmobjlock, RW_READER); 171 172 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) { 173 pgi = 0; 174 if (amap != NULL) { 175 /* Check the upper layer first. */ 176 anon = amap_lookup(&entry->aref, 177 start - entry->start); 178 /* Don't need to lock anon here. */ 179 if (anon != NULL && anon->an_page != NULL) { 180 181 /* 182 * Anon has the page for this entry 183 * offset. 184 */ 185 186 pgi = 1; 187 } 188 } 189 if (uobj != NULL && pgi == 0) { 190 /* Check the lower layer. */ 191 pg = uvm_pagelookup(uobj, 192 entry->offset + (start - entry->start)); 193 if (pg != NULL) { 194 195 /* 196 * Object has the page for this entry 197 * offset. 198 */ 199 200 pgi = 1; 201 } 202 } 203 *vec = pgi; 204 } 205 if (uobj != NULL) 206 rw_exit(uobj->vmobjlock); 207 if (amap != NULL) 208 amap_unlock(amap); 209 } 210 211 out: 212 vm_map_unlock_read(map); 213 return error; 214 } 215 216 #define MINCORE_CHUNK 256 /* number of pages to process at once */ 217 218 /* 219 * sys_mincore: determine if pages are in core or not. 220 */ 221 222 /* ARGSUSED */ 223 int 224 sys_mincore(struct lwp *l, const struct sys_mincore_args *uap, 225 register_t *retval) 226 { 227 /* { 228 syscallarg(void *) addr; 229 syscallarg(size_t) len; 230 syscallarg(char *) vec; 231 } */ 232 struct proc *p = l->l_proc; 233 struct vm_map *map; 234 vsize_t start, end, len; 235 char *vec; 236 vsize_t pgoff; 237 vsize_t pglen; 238 char *buf; 239 size_t bufsize; 240 int error; 241 242 map = &p->p_vmspace->vm_map; 243 244 start = (vaddr_t)SCARG(uap, addr); 245 len = SCARG(uap, len); 246 vec = SCARG(uap, vec); 247 248 if (start & PAGE_MASK) 249 return EINVAL; 250 len = round_page(len); 251 end = start + len; 252 if (end <= start) 253 return EINVAL; 254 255 bufsize = MINCORE_CHUNK; 256 buf = kmem_alloc(bufsize, KM_SLEEP); 257 error = 0; 258 pgoff = 0; 259 pglen = len / PAGE_SIZE; 260 while (pgoff < pglen) { 261 vsize_t npgs = MIN(pglen - pgoff, MINCORE_CHUNK); 262 vaddr_t cstart = start + pgoff * PAGE_SIZE; 263 vaddr_t cend = cstart + npgs * PAGE_SIZE; 264 error = mincore_chunk(map, cstart, cend, buf); 265 if (error != 0) { 266 break; 267 } 268 error = copyout(buf, vec + pgoff, npgs); 269 if (error != 0) { 270 break; 271 } 272 pgoff += npgs; 273 } 274 kmem_free(buf, bufsize); 275 return error; 276 } 277 278 /* 279 * sys_mmap: mmap system call. 280 * 281 * => file offset and address may not be page aligned 282 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE 283 * - if address isn't page aligned the mapping starts at trunc_page(addr) 284 * and the return value is adjusted up by the page offset. 285 */ 286 287 int 288 sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval) 289 { 290 /* { 291 syscallarg(void *) addr; 292 syscallarg(size_t) len; 293 syscallarg(int) prot; 294 syscallarg(int) flags; 295 syscallarg(int) fd; 296 syscallarg(long) pad; 297 syscallarg(off_t) pos; 298 } */ 299 struct proc *p = l->l_proc; 300 vaddr_t addr; 301 off_t pos; 302 vsize_t size, pageoff; 303 vm_prot_t prot, maxprot, extraprot; 304 int flags, fd, advice; 305 vaddr_t defaddr = 0; /* XXXGCC */ 306 bool addrhint = false; 307 struct file *fp = NULL; 308 struct uvm_object *uobj; 309 int error; 310 vaddr_t orig_addr; 311 312 /* 313 * first, extract syscall args from the uap. 314 */ 315 316 addr = (vaddr_t)SCARG(uap, addr); 317 size = (vsize_t)SCARG(uap, len); 318 prot = SCARG(uap, prot) & VM_PROT_ALL; 319 extraprot = PROT_MPROTECT_EXTRACT(SCARG(uap, prot)); 320 flags = SCARG(uap, flags); 321 fd = SCARG(uap, fd); 322 pos = SCARG(uap, pos); 323 324 orig_addr = addr; 325 326 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE)) 327 return EINVAL; 328 329 if (size == 0 && (flags & MAP_ANON) == 0) 330 return EINVAL; 331 332 /* 333 * Align file position and save offset into page. Adjust size 334 * so that it is an integral multiple of the page size. 335 */ 336 pageoff = pos & PAGE_MASK; 337 pos -= pageoff; 338 KASSERT(PAGE_MASK <= __type_max(vsize_t)); 339 KASSERT((__type_max(vsize_t) - PAGE_SIZE + 1) % PAGE_SIZE == 0); 340 if (size > __type_max(vsize_t) - PAGE_SIZE + 1 - pageoff) 341 return ENOMEM; 342 /* 343 * size + pageoff <= VSIZE_MAX + 1 - PAGE_SIZE, and the 344 * right-hand side is an integral multiple of the page size, so 345 * round_page(size + pageoff) <= VSIZE_MAX + 1 - PAGE_SIZE. 346 */ 347 size = round_page(size + pageoff); 348 349 /* 350 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" 351 */ 352 if (flags & MAP_FIXED) { 353 /* ensure address and file offset are aligned properly */ 354 addr -= pageoff; 355 if (addr & PAGE_MASK) 356 return EINVAL; 357 358 error = range_test(&p->p_vmspace->vm_map, addr, size, true); 359 if (error) { 360 return error; 361 } 362 } else if (addr == 0 || !(flags & MAP_TRYFIXED)) { 363 /* 364 * not fixed: make sure we skip over the largest 365 * possible heap for non-topdown mapping arrangements. 366 * we will refine our guess later (e.g. to account for 367 * VAC, etc) 368 */ 369 370 defaddr = p->p_emul->e_vm_default_addr(p, 371 (vaddr_t)p->p_vmspace->vm_daddr, size, 372 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 373 374 if (addr == 0 || !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN)) 375 addr = MAX(addr, defaddr); 376 else 377 addr = MIN(addr, defaddr); 378 379 /* 380 * If addr is nonzero and not the default, then the 381 * address is a hint. 382 */ 383 addrhint = (addr != 0 && addr != defaddr); 384 } 385 386 /* 387 * check for file mappings (i.e. not anonymous) and verify file. 388 */ 389 390 advice = UVM_ADV_NORMAL; 391 if ((flags & MAP_ANON) == 0) { 392 KASSERT(size != 0); 393 394 if ((fp = fd_getfile(fd)) == NULL) 395 return EBADF; 396 397 if (fp->f_ops->fo_mmap == NULL) { 398 error = ENODEV; 399 goto out; 400 } 401 error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags, 402 &advice, &uobj, &maxprot); 403 if (error) { 404 goto out; 405 } 406 if (uobj == NULL) { 407 flags |= MAP_ANON; 408 fd_putfile(fd); 409 fp = NULL; 410 goto is_anon; 411 } 412 } else { /* MAP_ANON case */ 413 /* 414 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0? 415 */ 416 if (fd != -1) 417 return EINVAL; 418 419 is_anon: /* label for SunOS style /dev/zero */ 420 uobj = NULL; 421 maxprot = VM_PROT_ALL; 422 pos = 0; 423 } 424 425 maxprot = PAX_MPROTECT_MAXPROTECT(l, prot, extraprot, maxprot); 426 if (((prot | extraprot) & maxprot) != (prot | extraprot)) { 427 error = EACCES; 428 goto out; 429 } 430 if ((error = PAX_MPROTECT_VALIDATE(l, prot))) 431 goto out; 432 433 pax_aslr_mmap(l, &addr, orig_addr, flags); 434 435 /* 436 * Now let kernel internal function uvm_mmap do the work. 437 * 438 * If the user provided a hint, take a reference to uobj in 439 * case the first attempt to satisfy the hint fails, so we can 440 * try again with the default address. 441 */ 442 if (addrhint) { 443 if (uobj) 444 (*uobj->pgops->pgo_reference)(uobj); 445 } 446 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 447 flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 448 if (addrhint) { 449 if (error) { 450 addr = defaddr; 451 pax_aslr_mmap(l, &addr, orig_addr, flags); 452 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, 453 prot, maxprot, flags, advice, uobj, pos, 454 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 455 } else if (uobj) { 456 /* Release the exta reference we took. */ 457 (*uobj->pgops->pgo_detach)(uobj); 458 } 459 } 460 461 /* remember to add offset */ 462 *retval = (register_t)(addr + pageoff); 463 464 out: 465 if (fp != NULL) 466 fd_putfile(fd); 467 468 return error; 469 } 470 471 /* 472 * sys___msync13: the msync system call (a front-end for flush) 473 */ 474 475 int 476 sys___msync13(struct lwp *l, const struct sys___msync13_args *uap, 477 register_t *retval) 478 { 479 /* { 480 syscallarg(void *) addr; 481 syscallarg(size_t) len; 482 syscallarg(int) flags; 483 } */ 484 struct proc *p = l->l_proc; 485 vaddr_t addr; 486 vsize_t size; 487 struct vm_map *map; 488 int error, flags, uvmflags; 489 bool rv; 490 491 /* 492 * extract syscall args from the uap 493 */ 494 495 addr = (vaddr_t)SCARG(uap, addr); 496 size = (vsize_t)SCARG(uap, len); 497 flags = SCARG(uap, flags); 498 499 /* sanity check flags */ 500 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 || 501 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 || 502 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC)) 503 return EINVAL; 504 if ((flags & (MS_ASYNC | MS_SYNC)) == 0) 505 flags |= MS_SYNC; 506 507 /* 508 * get map 509 */ 510 map = &p->p_vmspace->vm_map; 511 512 if (round_and_check(map, &addr, &size)) 513 return ENOMEM; 514 515 /* 516 * XXXCDC: do we really need this semantic? 517 * 518 * XXX Gak! If size is zero we are supposed to sync "all modified 519 * pages with the region containing addr". Unfortunately, we 520 * don't really keep track of individual mmaps so we approximate 521 * by flushing the range of the map entry containing addr. 522 * This can be incorrect if the region splits or is coalesced 523 * with a neighbor. 524 */ 525 526 if (size == 0) { 527 struct vm_map_entry *entry; 528 529 vm_map_lock_read(map); 530 rv = uvm_map_lookup_entry(map, addr, &entry); 531 if (rv == true) { 532 addr = entry->start; 533 size = entry->end - entry->start; 534 } 535 vm_map_unlock_read(map); 536 if (rv == false) 537 return EINVAL; 538 } 539 540 /* 541 * translate MS_ flags into PGO_ flags 542 */ 543 544 uvmflags = PGO_CLEANIT; 545 if (flags & MS_INVALIDATE) 546 uvmflags |= PGO_FREE; 547 if (flags & MS_SYNC) 548 uvmflags |= PGO_SYNCIO; 549 550 error = uvm_map_clean(map, addr, addr+size, uvmflags); 551 return error; 552 } 553 554 /* 555 * sys_munmap: unmap a users memory 556 */ 557 558 int 559 sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval) 560 { 561 /* { 562 syscallarg(void *) addr; 563 syscallarg(size_t) len; 564 } */ 565 struct proc *p = l->l_proc; 566 vaddr_t addr; 567 vsize_t size; 568 struct vm_map *map; 569 struct vm_map_entry *dead_entries; 570 571 /* 572 * get syscall args. 573 */ 574 575 addr = (vaddr_t)SCARG(uap, addr); 576 size = (vsize_t)SCARG(uap, len); 577 578 map = &p->p_vmspace->vm_map; 579 580 if (round_and_check(map, &addr, &size)) 581 return EINVAL; 582 583 if (size == 0) 584 return 0; 585 586 vm_map_lock(map); 587 #if 0 588 /* 589 * interesting system call semantic: make sure entire range is 590 * allocated before allowing an unmap. 591 */ 592 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) { 593 vm_map_unlock(map); 594 return EINVAL; 595 } 596 #endif 597 uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0); 598 vm_map_unlock(map); 599 if (dead_entries != NULL) 600 uvm_unmap_detach(dead_entries, 0); 601 return 0; 602 } 603 604 /* 605 * sys_mprotect: the mprotect system call 606 */ 607 608 int 609 sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap, 610 register_t *retval) 611 { 612 /* { 613 syscallarg(void *) addr; 614 syscallarg(size_t) len; 615 syscallarg(int) prot; 616 } */ 617 struct proc *p = l->l_proc; 618 vaddr_t addr; 619 vsize_t size; 620 vm_prot_t prot; 621 int error; 622 623 /* 624 * extract syscall args from uap 625 */ 626 627 addr = (vaddr_t)SCARG(uap, addr); 628 size = (vsize_t)SCARG(uap, len); 629 prot = SCARG(uap, prot) & VM_PROT_ALL; 630 631 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size)) 632 return EINVAL; 633 634 error = uvm_map_protect_user(l, addr, addr + size, prot); 635 return error; 636 } 637 638 /* 639 * sys_minherit: the minherit system call 640 */ 641 642 int 643 sys_minherit(struct lwp *l, const struct sys_minherit_args *uap, 644 register_t *retval) 645 { 646 /* { 647 syscallarg(void *) addr; 648 syscallarg(int) len; 649 syscallarg(int) inherit; 650 } */ 651 struct proc *p = l->l_proc; 652 vaddr_t addr; 653 vsize_t size; 654 vm_inherit_t inherit; 655 int error; 656 657 addr = (vaddr_t)SCARG(uap, addr); 658 size = (vsize_t)SCARG(uap, len); 659 inherit = SCARG(uap, inherit); 660 661 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size)) 662 return EINVAL; 663 664 error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size, 665 inherit); 666 return error; 667 } 668 669 /* 670 * sys_madvise: give advice about memory usage. 671 */ 672 673 /* ARGSUSED */ 674 int 675 sys_madvise(struct lwp *l, const struct sys_madvise_args *uap, 676 register_t *retval) 677 { 678 /* { 679 syscallarg(void *) addr; 680 syscallarg(size_t) len; 681 syscallarg(int) behav; 682 } */ 683 struct proc *p = l->l_proc; 684 vaddr_t addr; 685 vsize_t size; 686 int advice, error; 687 688 addr = (vaddr_t)SCARG(uap, addr); 689 size = (vsize_t)SCARG(uap, len); 690 advice = SCARG(uap, behav); 691 692 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size)) 693 return EINVAL; 694 695 switch (advice) { 696 case MADV_NORMAL: 697 case MADV_RANDOM: 698 case MADV_SEQUENTIAL: 699 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size, 700 advice); 701 break; 702 703 case MADV_WILLNEED: 704 705 /* 706 * Activate all these pages, pre-faulting them in if 707 * necessary. 708 */ 709 error = uvm_map_willneed(&p->p_vmspace->vm_map, 710 addr, addr + size); 711 break; 712 713 case MADV_DONTNEED: 714 715 /* 716 * Deactivate all these pages. We don't need them 717 * any more. We don't, however, toss the data in 718 * the pages. 719 */ 720 721 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 722 PGO_DEACTIVATE); 723 break; 724 725 case MADV_FREE: 726 727 /* 728 * These pages contain no valid data, and may be 729 * garbage-collected. Toss all resources, including 730 * any swap space in use. 731 */ 732 733 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 734 PGO_FREE); 735 break; 736 737 case MADV_SPACEAVAIL: 738 739 /* 740 * XXXMRG What is this? I think it's: 741 * 742 * Ensure that we have allocated backing-store 743 * for these pages. 744 * 745 * This is going to require changes to the page daemon, 746 * as it will free swap space allocated to pages in core. 747 * There's also what to do for device/file/anonymous memory. 748 */ 749 750 return EINVAL; 751 752 default: 753 return EINVAL; 754 } 755 756 return error; 757 } 758 759 /* 760 * sys_mlock: memory lock 761 */ 762 763 int 764 sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval) 765 { 766 /* { 767 syscallarg(const void *) addr; 768 syscallarg(size_t) len; 769 } */ 770 struct proc *p = l->l_proc; 771 vaddr_t addr; 772 vsize_t size; 773 int error; 774 775 /* 776 * extract syscall args from uap 777 */ 778 779 addr = (vaddr_t)SCARG(uap, addr); 780 size = (vsize_t)SCARG(uap, len); 781 782 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size)) 783 return ENOMEM; 784 785 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) 786 return EAGAIN; 787 788 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 789 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 790 return EAGAIN; 791 792 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false, 793 0); 794 if (error == EFAULT) 795 error = ENOMEM; 796 return error; 797 } 798 799 /* 800 * sys_munlock: unlock wired pages 801 */ 802 803 int 804 sys_munlock(struct lwp *l, const struct sys_munlock_args *uap, 805 register_t *retval) 806 { 807 /* { 808 syscallarg(const void *) addr; 809 syscallarg(size_t) len; 810 } */ 811 struct proc *p = l->l_proc; 812 vaddr_t addr; 813 vsize_t size; 814 815 /* 816 * extract syscall args from uap 817 */ 818 819 addr = (vaddr_t)SCARG(uap, addr); 820 size = (vsize_t)SCARG(uap, len); 821 822 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size)) 823 return ENOMEM; 824 825 if (uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true, 0)) 826 return ENOMEM; 827 828 return 0; 829 } 830 831 /* 832 * sys_mlockall: lock all pages mapped into an address space. 833 */ 834 835 int 836 sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap, 837 register_t *retval) 838 { 839 /* { 840 syscallarg(int) flags; 841 } */ 842 struct proc *p = l->l_proc; 843 int error, flags; 844 845 flags = SCARG(uap, flags); 846 847 if (flags == 0 || (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0) 848 return EINVAL; 849 850 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags, 851 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 852 return error; 853 } 854 855 /* 856 * sys_munlockall: unlock all pages mapped into an address space. 857 */ 858 859 int 860 sys_munlockall(struct lwp *l, const void *v, register_t *retval) 861 { 862 struct proc *p = l->l_proc; 863 864 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0); 865 return 0; 866 } 867 868 /* 869 * uvm_mmap: internal version of mmap 870 * 871 * - used by sys_mmap and various framebuffers 872 * - uobj is a struct uvm_object pointer or NULL for MAP_ANON 873 * - caller must page-align the file offset 874 * 875 * XXX This appears to leak the uobj in various error branches? Need 876 * to clean up the contract around uobj reference. 877 */ 878 879 static int 880 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot, 881 vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj, 882 voff_t foff, vsize_t locklimit) 883 { 884 vaddr_t align = 0; 885 int error; 886 uvm_flag_t uvmflag = 0; 887 888 /* 889 * check params 890 */ 891 892 if (size == 0) 893 return 0; 894 if (foff & PAGE_MASK) 895 return EINVAL; 896 if ((prot & maxprot) != prot) 897 return EINVAL; 898 899 /* 900 * for non-fixed mappings, round off the suggested address. 901 * for fixed mappings, check alignment. 902 */ 903 904 if ((flags & MAP_FIXED) == 0) { 905 *addr = round_page(*addr); 906 } else { 907 if (*addr & PAGE_MASK) 908 return EINVAL; 909 uvmflag |= UVM_FLAG_FIXED | UVM_FLAG_UNMAP; 910 } 911 912 /* 913 * Try to see if any requested alignment can even be attemped. 914 * Make sure we can express the alignment (asking for a >= 4GB 915 * alignment on an ILP32 architecture make no sense) and the 916 * alignment is at least for a page sized quantity. If the 917 * request was for a fixed mapping, make sure supplied address 918 * adheres to the request alignment. 919 */ 920 align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT; 921 if (align) { 922 if (align >= sizeof(vaddr_t) * NBBY) 923 return EINVAL; 924 align = 1UL << align; 925 if (align < PAGE_SIZE) 926 return EINVAL; 927 if (align >= vm_map_max(map)) 928 return ENOMEM; 929 if (flags & MAP_FIXED) { 930 if ((*addr & (align-1)) != 0) 931 return EINVAL; 932 align = 0; 933 } 934 } 935 936 /* 937 * check resource limits 938 */ 939 940 if (!VM_MAP_IS_KERNEL(map) && 941 (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) > 942 curproc->p_rlimit[RLIMIT_AS].rlim_cur)) 943 return ENOMEM; 944 945 /* 946 * handle anon vs. non-anon mappings. for non-anon mappings attach 947 * to underlying vm object. 948 */ 949 950 if (flags & MAP_ANON) { 951 KASSERT(uobj == NULL); 952 foff = UVM_UNKNOWN_OFFSET; 953 if ((flags & MAP_SHARED) == 0) 954 /* XXX: defer amap create */ 955 uvmflag |= UVM_FLAG_COPYONW; 956 else 957 /* shared: create amap now */ 958 uvmflag |= UVM_FLAG_OVERLAY; 959 960 } else { 961 KASSERT(uobj != NULL); 962 if ((flags & MAP_SHARED) == 0) { 963 uvmflag |= UVM_FLAG_COPYONW; 964 } 965 } 966 967 uvmflag = UVM_MAPFLAG(prot, maxprot, 968 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, advice, 969 uvmflag); 970 error = uvm_map(map, addr, size, uobj, foff, align, uvmflag); 971 if (error) { 972 if (uobj) 973 uobj->pgops->pgo_detach(uobj); 974 return error; 975 } 976 977 /* 978 * POSIX 1003.1b -- if our address space was configured 979 * to lock all future mappings, wire the one we just made. 980 * 981 * Also handle the MAP_WIRED flag here. 982 */ 983 984 if (prot == VM_PROT_NONE) { 985 986 /* 987 * No more work to do in this case. 988 */ 989 990 return 0; 991 } 992 if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) { 993 vm_map_lock(map); 994 if (atop(size) + uvmexp.wired > uvmexp.wiredmax || 995 (locklimit != 0 && 996 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > 997 locklimit)) { 998 vm_map_unlock(map); 999 uvm_unmap(map, *addr, *addr + size); 1000 return ENOMEM; 1001 } 1002 1003 /* 1004 * uvm_map_pageable() always returns the map unlocked. 1005 */ 1006 1007 error = uvm_map_pageable(map, *addr, *addr + size, 1008 false, UVM_LK_ENTER); 1009 if (error) { 1010 uvm_unmap(map, *addr, *addr + size); 1011 return error; 1012 } 1013 return 0; 1014 } 1015 return 0; 1016 } 1017 1018 vaddr_t 1019 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown) 1020 { 1021 1022 if (topdown) 1023 return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz); 1024 else 1025 return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz); 1026 } 1027 1028 int 1029 uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev, 1030 off_t off) 1031 { 1032 struct uvm_object *uobj; 1033 int error, flags, prot; 1034 1035 KASSERT(len > 0); 1036 1037 flags = MAP_SHARED; 1038 prot = VM_PROT_READ | VM_PROT_WRITE; 1039 if (*addrp) 1040 flags |= MAP_FIXED; 1041 else 1042 *addrp = (void *)p->p_emul->e_vm_default_addr(p, 1043 (vaddr_t)p->p_vmspace->vm_daddr, len, 1044 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 1045 1046 uobj = udv_attach(dev, prot, off, len); 1047 if (uobj == NULL) 1048 return EINVAL; 1049 1050 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp, 1051 (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM, uobj, off, 1052 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 1053 return error; 1054 } 1055 1056 int 1057 uvm_mmap_anon(struct proc *p, void **addrp, size_t len) 1058 { 1059 int error, flags, prot; 1060 1061 flags = MAP_PRIVATE | MAP_ANON; 1062 prot = VM_PROT_READ | VM_PROT_WRITE; 1063 if (*addrp) 1064 flags |= MAP_FIXED; 1065 else 1066 *addrp = (void *)p->p_emul->e_vm_default_addr(p, 1067 (vaddr_t)p->p_vmspace->vm_daddr, len, 1068 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 1069 1070 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp, 1071 (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL, NULL, 0, 1072 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 1073 return error; 1074 } 1075