uvm_mmap.c revision 1.177 1 /* $NetBSD: uvm_mmap.c,v 1.177 2022/03/27 20:18:05 hannken Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993 The Regents of the University of California.
6 * Copyright (c) 1988 University of Utah.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * the Systems Programming Group of the University of Utah Computer
12 * Science Department.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
39 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
40 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
41 */
42
43 /*
44 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
45 * function.
46 */
47
48 #include <sys/cdefs.h>
49 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.177 2022/03/27 20:18:05 hannken Exp $");
50
51 #include "opt_compat_netbsd.h"
52 #include "opt_pax.h"
53
54 #include <sys/param.h>
55 #include <sys/types.h>
56 #include <sys/file.h>
57 #include <sys/filedesc.h>
58 #include <sys/resourcevar.h>
59 #include <sys/mman.h>
60 #include <sys/pax.h>
61
62 #include <sys/syscallargs.h>
63
64 #include <uvm/uvm.h>
65 #include <uvm/uvm_device.h>
66
67 static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t,
68 int, int, struct uvm_object *, voff_t, vsize_t);
69
70 static int
71 range_test(const struct vm_map *map, vaddr_t addr, vsize_t size, bool ismmap)
72 {
73 vaddr_t vm_min_address = vm_map_min(map);
74 vaddr_t vm_max_address = vm_map_max(map);
75 vaddr_t eaddr = addr + size;
76 int res = 0;
77
78 if (addr < vm_min_address)
79 return EINVAL;
80 if (eaddr > vm_max_address)
81 return ismmap ? EFBIG : EINVAL;
82 if (addr > eaddr) /* no wrapping! */
83 return ismmap ? EOVERFLOW : EINVAL;
84
85 #ifdef MD_MMAP_RANGE_TEST
86 res = MD_MMAP_RANGE_TEST(addr, eaddr);
87 #endif
88
89 return res;
90 }
91
92 /*
93 * align the address to a page boundary, and adjust the size accordingly
94 */
95 static int
96 round_and_check(const struct vm_map *map, vaddr_t *addr, vsize_t *size)
97 {
98 const vsize_t pageoff = (vsize_t)(*addr & PAGE_MASK);
99
100 *addr -= pageoff;
101
102 if (*size != 0) {
103 *size += pageoff;
104 *size = (vsize_t)round_page(*size);
105 } else if (*addr + *size < *addr) {
106 return ENOMEM;
107 }
108
109 return range_test(map, *addr, *size, false);
110 }
111
112 /*
113 * sys_mincore: determine if pages are in core or not.
114 */
115
116 /* ARGSUSED */
117 int
118 sys_mincore(struct lwp *l, const struct sys_mincore_args *uap,
119 register_t *retval)
120 {
121 /* {
122 syscallarg(void *) addr;
123 syscallarg(size_t) len;
124 syscallarg(char *) vec;
125 } */
126 struct proc *p = l->l_proc;
127 struct vm_page *pg;
128 char *vec, pgi;
129 struct uvm_object *uobj;
130 struct vm_amap *amap;
131 struct vm_anon *anon;
132 struct vm_map_entry *entry;
133 vaddr_t start, end, lim;
134 struct vm_map *map;
135 vsize_t len;
136 int error = 0;
137 size_t npgs;
138
139 map = &p->p_vmspace->vm_map;
140
141 start = (vaddr_t)SCARG(uap, addr);
142 len = SCARG(uap, len);
143 vec = SCARG(uap, vec);
144
145 if (start & PAGE_MASK)
146 return EINVAL;
147 len = round_page(len);
148 end = start + len;
149 if (end <= start)
150 return EINVAL;
151
152 /*
153 * Lock down vec, so our returned status isn't outdated by
154 * storing the status byte for a page.
155 */
156
157 npgs = len >> PAGE_SHIFT;
158 error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE);
159 if (error) {
160 return error;
161 }
162 vm_map_lock_read(map);
163
164 if (uvm_map_lookup_entry(map, start, &entry) == false) {
165 error = ENOMEM;
166 goto out;
167 }
168
169 for (/* nothing */;
170 entry != &map->header && entry->start < end;
171 entry = entry->next) {
172 KASSERT(!UVM_ET_ISSUBMAP(entry));
173 KASSERT(start >= entry->start);
174
175 /* Make sure there are no holes. */
176 if (entry->end < end &&
177 (entry->next == &map->header ||
178 entry->next->start > entry->end)) {
179 error = ENOMEM;
180 goto out;
181 }
182
183 lim = end < entry->end ? end : entry->end;
184
185 /*
186 * Special case for objects with no "real" pages. Those
187 * are always considered resident (mapped devices).
188 */
189
190 if (UVM_ET_ISOBJ(entry)) {
191 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
192 if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) {
193 for (/* nothing */; start < lim;
194 start += PAGE_SIZE, vec++)
195 ustore_char(vec, 1);
196 continue;
197 }
198 }
199
200 amap = entry->aref.ar_amap; /* upper layer */
201 uobj = entry->object.uvm_obj; /* lower layer */
202
203 if (amap != NULL)
204 amap_lock(amap, RW_READER);
205 if (uobj != NULL)
206 rw_enter(uobj->vmobjlock, RW_READER);
207
208 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
209 pgi = 0;
210 if (amap != NULL) {
211 /* Check the upper layer first. */
212 anon = amap_lookup(&entry->aref,
213 start - entry->start);
214 /* Don't need to lock anon here. */
215 if (anon != NULL && anon->an_page != NULL) {
216
217 /*
218 * Anon has the page for this entry
219 * offset.
220 */
221
222 pgi = 1;
223 }
224 }
225 if (uobj != NULL && pgi == 0) {
226 /* Check the lower layer. */
227 pg = uvm_pagelookup(uobj,
228 entry->offset + (start - entry->start));
229 if (pg != NULL) {
230
231 /*
232 * Object has the page for this entry
233 * offset.
234 */
235
236 pgi = 1;
237 }
238 }
239 (void) ustore_char(vec, pgi);
240 }
241 if (uobj != NULL)
242 rw_exit(uobj->vmobjlock);
243 if (amap != NULL)
244 amap_unlock(amap);
245 }
246
247 out:
248 vm_map_unlock_read(map);
249 uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs);
250 return error;
251 }
252
253 /*
254 * sys_mmap: mmap system call.
255 *
256 * => file offset and address may not be page aligned
257 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
258 * - if address isn't page aligned the mapping starts at trunc_page(addr)
259 * and the return value is adjusted up by the page offset.
260 */
261
262 int
263 sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval)
264 {
265 /* {
266 syscallarg(void *) addr;
267 syscallarg(size_t) len;
268 syscallarg(int) prot;
269 syscallarg(int) flags;
270 syscallarg(int) fd;
271 syscallarg(long) pad;
272 syscallarg(off_t) pos;
273 } */
274 struct proc *p = l->l_proc;
275 vaddr_t addr;
276 off_t pos;
277 vsize_t size, pageoff, newsize;
278 vm_prot_t prot, maxprot, extraprot;
279 int flags, fd, advice;
280 vaddr_t defaddr;
281 struct file *fp = NULL;
282 struct uvm_object *uobj;
283 int error;
284 #ifdef PAX_ASLR
285 vaddr_t orig_addr;
286 #endif /* PAX_ASLR */
287
288 /*
289 * first, extract syscall args from the uap.
290 */
291
292 addr = (vaddr_t)SCARG(uap, addr);
293 size = (vsize_t)SCARG(uap, len);
294 prot = SCARG(uap, prot) & VM_PROT_ALL;
295 extraprot = PROT_MPROTECT_EXTRACT(SCARG(uap, prot));
296 flags = SCARG(uap, flags);
297 fd = SCARG(uap, fd);
298 pos = SCARG(uap, pos);
299
300 #ifdef PAX_ASLR
301 orig_addr = addr;
302 #endif /* PAX_ASLR */
303
304 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
305 return EINVAL;
306
307 if (size == 0 && (flags & MAP_ANON) == 0)
308 return EINVAL;
309
310 /*
311 * align file position and save offset. adjust size.
312 */
313
314 pageoff = (pos & PAGE_MASK);
315 pos -= pageoff;
316 newsize = size + pageoff; /* add offset */
317 newsize = (vsize_t)round_page(newsize); /* round up */
318
319 if (newsize < size)
320 return ENOMEM;
321 size = newsize;
322
323 /*
324 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
325 */
326 if (flags & MAP_FIXED) {
327 /* ensure address and file offset are aligned properly */
328 addr -= pageoff;
329 if (addr & PAGE_MASK)
330 return EINVAL;
331
332 error = range_test(&p->p_vmspace->vm_map, addr, size, true);
333 if (error) {
334 return error;
335 }
336 } else if (addr == 0 || !(flags & MAP_TRYFIXED)) {
337 /*
338 * not fixed: make sure we skip over the largest
339 * possible heap for non-topdown mapping arrangements.
340 * we will refine our guess later (e.g. to account for
341 * VAC, etc)
342 */
343
344 defaddr = p->p_emul->e_vm_default_addr(p,
345 (vaddr_t)p->p_vmspace->vm_daddr, size,
346 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
347
348 if (addr == 0 || !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN))
349 addr = MAX(addr, defaddr);
350 else
351 addr = MIN(addr, defaddr);
352 }
353
354 /*
355 * check for file mappings (i.e. not anonymous) and verify file.
356 */
357
358 advice = UVM_ADV_NORMAL;
359 if ((flags & MAP_ANON) == 0) {
360 if ((fp = fd_getfile(fd)) == NULL)
361 return EBADF;
362
363 if (fp->f_ops->fo_mmap == NULL) {
364 error = ENODEV;
365 goto out;
366 }
367 error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags,
368 &advice, &uobj, &maxprot);
369 if (error) {
370 goto out;
371 }
372 if (uobj == NULL) {
373 flags |= MAP_ANON;
374 fd_putfile(fd);
375 fp = NULL;
376 goto is_anon;
377 }
378 } else { /* MAP_ANON case */
379 /*
380 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
381 */
382 if (fd != -1)
383 return EINVAL;
384
385 is_anon: /* label for SunOS style /dev/zero */
386 uobj = NULL;
387 maxprot = VM_PROT_ALL;
388 pos = 0;
389 }
390
391 maxprot = PAX_MPROTECT_MAXPROTECT(l, prot, extraprot, maxprot);
392 if (((prot | extraprot) & maxprot) != (prot | extraprot)) {
393 error = EACCES;
394 goto out;
395 }
396 if ((error = PAX_MPROTECT_VALIDATE(l, prot)))
397 goto out;
398
399 pax_aslr_mmap(l, &addr, orig_addr, flags);
400
401 /*
402 * now let kernel internal function uvm_mmap do the work.
403 */
404
405 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
406 flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
407
408 /* remember to add offset */
409 *retval = (register_t)(addr + pageoff);
410
411 out:
412 if (fp != NULL)
413 fd_putfile(fd);
414
415 return error;
416 }
417
418 /*
419 * sys___msync13: the msync system call (a front-end for flush)
420 */
421
422 int
423 sys___msync13(struct lwp *l, const struct sys___msync13_args *uap,
424 register_t *retval)
425 {
426 /* {
427 syscallarg(void *) addr;
428 syscallarg(size_t) len;
429 syscallarg(int) flags;
430 } */
431 struct proc *p = l->l_proc;
432 vaddr_t addr;
433 vsize_t size;
434 struct vm_map *map;
435 int error, flags, uvmflags;
436 bool rv;
437
438 /*
439 * extract syscall args from the uap
440 */
441
442 addr = (vaddr_t)SCARG(uap, addr);
443 size = (vsize_t)SCARG(uap, len);
444 flags = SCARG(uap, flags);
445
446 /* sanity check flags */
447 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
448 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
449 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
450 return EINVAL;
451 if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
452 flags |= MS_SYNC;
453
454 /*
455 * get map
456 */
457 map = &p->p_vmspace->vm_map;
458
459 if (round_and_check(map, &addr, &size))
460 return ENOMEM;
461
462 /*
463 * XXXCDC: do we really need this semantic?
464 *
465 * XXX Gak! If size is zero we are supposed to sync "all modified
466 * pages with the region containing addr". Unfortunately, we
467 * don't really keep track of individual mmaps so we approximate
468 * by flushing the range of the map entry containing addr.
469 * This can be incorrect if the region splits or is coalesced
470 * with a neighbor.
471 */
472
473 if (size == 0) {
474 struct vm_map_entry *entry;
475
476 vm_map_lock_read(map);
477 rv = uvm_map_lookup_entry(map, addr, &entry);
478 if (rv == true) {
479 addr = entry->start;
480 size = entry->end - entry->start;
481 }
482 vm_map_unlock_read(map);
483 if (rv == false)
484 return EINVAL;
485 }
486
487 /*
488 * translate MS_ flags into PGO_ flags
489 */
490
491 uvmflags = PGO_CLEANIT;
492 if (flags & MS_INVALIDATE)
493 uvmflags |= PGO_FREE;
494 if (flags & MS_SYNC)
495 uvmflags |= PGO_SYNCIO;
496
497 error = uvm_map_clean(map, addr, addr+size, uvmflags);
498 return error;
499 }
500
501 /*
502 * sys_munmap: unmap a users memory
503 */
504
505 int
506 sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval)
507 {
508 /* {
509 syscallarg(void *) addr;
510 syscallarg(size_t) len;
511 } */
512 struct proc *p = l->l_proc;
513 vaddr_t addr;
514 vsize_t size;
515 struct vm_map *map;
516 struct vm_map_entry *dead_entries;
517
518 /*
519 * get syscall args.
520 */
521
522 addr = (vaddr_t)SCARG(uap, addr);
523 size = (vsize_t)SCARG(uap, len);
524
525 map = &p->p_vmspace->vm_map;
526
527 if (round_and_check(map, &addr, &size))
528 return EINVAL;
529
530 if (size == 0)
531 return 0;
532
533 vm_map_lock(map);
534 #if 0
535 /*
536 * interesting system call semantic: make sure entire range is
537 * allocated before allowing an unmap.
538 */
539 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
540 vm_map_unlock(map);
541 return EINVAL;
542 }
543 #endif
544 uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0);
545 vm_map_unlock(map);
546 if (dead_entries != NULL)
547 uvm_unmap_detach(dead_entries, 0);
548 return 0;
549 }
550
551 /*
552 * sys_mprotect: the mprotect system call
553 */
554
555 int
556 sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap,
557 register_t *retval)
558 {
559 /* {
560 syscallarg(void *) addr;
561 syscallarg(size_t) len;
562 syscallarg(int) prot;
563 } */
564 struct proc *p = l->l_proc;
565 vaddr_t addr;
566 vsize_t size;
567 vm_prot_t prot;
568 int error;
569
570 /*
571 * extract syscall args from uap
572 */
573
574 addr = (vaddr_t)SCARG(uap, addr);
575 size = (vsize_t)SCARG(uap, len);
576 prot = SCARG(uap, prot) & VM_PROT_ALL;
577
578 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
579 return EINVAL;
580
581 error = uvm_map_protect_user(l, addr, addr + size, prot);
582 return error;
583 }
584
585 /*
586 * sys_minherit: the minherit system call
587 */
588
589 int
590 sys_minherit(struct lwp *l, const struct sys_minherit_args *uap,
591 register_t *retval)
592 {
593 /* {
594 syscallarg(void *) addr;
595 syscallarg(int) len;
596 syscallarg(int) inherit;
597 } */
598 struct proc *p = l->l_proc;
599 vaddr_t addr;
600 vsize_t size;
601 vm_inherit_t inherit;
602 int error;
603
604 addr = (vaddr_t)SCARG(uap, addr);
605 size = (vsize_t)SCARG(uap, len);
606 inherit = SCARG(uap, inherit);
607
608 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
609 return EINVAL;
610
611 error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size,
612 inherit);
613 return error;
614 }
615
616 /*
617 * sys_madvise: give advice about memory usage.
618 */
619
620 /* ARGSUSED */
621 int
622 sys_madvise(struct lwp *l, const struct sys_madvise_args *uap,
623 register_t *retval)
624 {
625 /* {
626 syscallarg(void *) addr;
627 syscallarg(size_t) len;
628 syscallarg(int) behav;
629 } */
630 struct proc *p = l->l_proc;
631 vaddr_t addr;
632 vsize_t size;
633 int advice, error;
634
635 addr = (vaddr_t)SCARG(uap, addr);
636 size = (vsize_t)SCARG(uap, len);
637 advice = SCARG(uap, behav);
638
639 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
640 return EINVAL;
641
642 switch (advice) {
643 case MADV_NORMAL:
644 case MADV_RANDOM:
645 case MADV_SEQUENTIAL:
646 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
647 advice);
648 break;
649
650 case MADV_WILLNEED:
651
652 /*
653 * Activate all these pages, pre-faulting them in if
654 * necessary.
655 */
656 error = uvm_map_willneed(&p->p_vmspace->vm_map,
657 addr, addr + size);
658 break;
659
660 case MADV_DONTNEED:
661
662 /*
663 * Deactivate all these pages. We don't need them
664 * any more. We don't, however, toss the data in
665 * the pages.
666 */
667
668 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
669 PGO_DEACTIVATE);
670 break;
671
672 case MADV_FREE:
673
674 /*
675 * These pages contain no valid data, and may be
676 * garbage-collected. Toss all resources, including
677 * any swap space in use.
678 */
679
680 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
681 PGO_FREE);
682 break;
683
684 case MADV_SPACEAVAIL:
685
686 /*
687 * XXXMRG What is this? I think it's:
688 *
689 * Ensure that we have allocated backing-store
690 * for these pages.
691 *
692 * This is going to require changes to the page daemon,
693 * as it will free swap space allocated to pages in core.
694 * There's also what to do for device/file/anonymous memory.
695 */
696
697 return EINVAL;
698
699 default:
700 return EINVAL;
701 }
702
703 return error;
704 }
705
706 /*
707 * sys_mlock: memory lock
708 */
709
710 int
711 sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval)
712 {
713 /* {
714 syscallarg(const void *) addr;
715 syscallarg(size_t) len;
716 } */
717 struct proc *p = l->l_proc;
718 vaddr_t addr;
719 vsize_t size;
720 int error;
721
722 /*
723 * extract syscall args from uap
724 */
725
726 addr = (vaddr_t)SCARG(uap, addr);
727 size = (vsize_t)SCARG(uap, len);
728
729 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
730 return ENOMEM;
731
732 if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
733 return EAGAIN;
734
735 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
736 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
737 return EAGAIN;
738
739 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false,
740 0);
741 if (error == EFAULT)
742 error = ENOMEM;
743 return error;
744 }
745
746 /*
747 * sys_munlock: unlock wired pages
748 */
749
750 int
751 sys_munlock(struct lwp *l, const struct sys_munlock_args *uap,
752 register_t *retval)
753 {
754 /* {
755 syscallarg(const void *) addr;
756 syscallarg(size_t) len;
757 } */
758 struct proc *p = l->l_proc;
759 vaddr_t addr;
760 vsize_t size;
761
762 /*
763 * extract syscall args from uap
764 */
765
766 addr = (vaddr_t)SCARG(uap, addr);
767 size = (vsize_t)SCARG(uap, len);
768
769 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
770 return ENOMEM;
771
772 if (uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true, 0))
773 return ENOMEM;
774
775 return 0;
776 }
777
778 /*
779 * sys_mlockall: lock all pages mapped into an address space.
780 */
781
782 int
783 sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap,
784 register_t *retval)
785 {
786 /* {
787 syscallarg(int) flags;
788 } */
789 struct proc *p = l->l_proc;
790 int error, flags;
791
792 flags = SCARG(uap, flags);
793
794 if (flags == 0 || (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
795 return EINVAL;
796
797 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
798 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
799 return error;
800 }
801
802 /*
803 * sys_munlockall: unlock all pages mapped into an address space.
804 */
805
806 int
807 sys_munlockall(struct lwp *l, const void *v, register_t *retval)
808 {
809 struct proc *p = l->l_proc;
810
811 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
812 return 0;
813 }
814
815 /*
816 * uvm_mmap: internal version of mmap
817 *
818 * - used by sys_mmap and various framebuffers
819 * - uobj is a struct uvm_object pointer or NULL for MAP_ANON
820 * - caller must page-align the file offset
821 */
822
823 int
824 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
825 vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj,
826 voff_t foff, vsize_t locklimit)
827 {
828 vaddr_t align = 0;
829 int error;
830 uvm_flag_t uvmflag = 0;
831
832 /*
833 * check params
834 */
835
836 if (size == 0)
837 return 0;
838 if (foff & PAGE_MASK)
839 return EINVAL;
840 if ((prot & maxprot) != prot)
841 return EINVAL;
842
843 /*
844 * for non-fixed mappings, round off the suggested address.
845 * for fixed mappings, check alignment.
846 */
847
848 if ((flags & MAP_FIXED) == 0) {
849 *addr = round_page(*addr);
850 } else {
851 if (*addr & PAGE_MASK)
852 return EINVAL;
853 uvmflag |= UVM_FLAG_FIXED | UVM_FLAG_UNMAP;
854 }
855
856 /*
857 * Try to see if any requested alignment can even be attemped.
858 * Make sure we can express the alignment (asking for a >= 4GB
859 * alignment on an ILP32 architecure make no sense) and the
860 * alignment is at least for a page sized quanitiy. If the
861 * request was for a fixed mapping, make sure supplied address
862 * adheres to the request alignment.
863 */
864 align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT;
865 if (align) {
866 if (align >= sizeof(vaddr_t) * NBBY)
867 return EINVAL;
868 align = 1UL << align;
869 if (align < PAGE_SIZE)
870 return EINVAL;
871 if (align >= vm_map_max(map))
872 return ENOMEM;
873 if (flags & MAP_FIXED) {
874 if ((*addr & (align-1)) != 0)
875 return EINVAL;
876 align = 0;
877 }
878 }
879
880 /*
881 * check resource limits
882 */
883
884 if (!VM_MAP_IS_KERNEL(map) &&
885 (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) >
886 curproc->p_rlimit[RLIMIT_AS].rlim_cur))
887 return ENOMEM;
888
889 /*
890 * handle anon vs. non-anon mappings. for non-anon mappings attach
891 * to underlying vm object.
892 */
893
894 if (flags & MAP_ANON) {
895 KASSERT(uobj == NULL);
896 foff = UVM_UNKNOWN_OFFSET;
897 if ((flags & MAP_SHARED) == 0)
898 /* XXX: defer amap create */
899 uvmflag |= UVM_FLAG_COPYONW;
900 else
901 /* shared: create amap now */
902 uvmflag |= UVM_FLAG_OVERLAY;
903
904 } else {
905 KASSERT(uobj != NULL);
906 if ((flags & MAP_SHARED) == 0) {
907 uvmflag |= UVM_FLAG_COPYONW;
908 }
909 }
910
911 uvmflag = UVM_MAPFLAG(prot, maxprot,
912 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, advice,
913 uvmflag);
914 error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
915 if (error) {
916 if (uobj)
917 uobj->pgops->pgo_detach(uobj);
918 return error;
919 }
920
921 /*
922 * POSIX 1003.1b -- if our address space was configured
923 * to lock all future mappings, wire the one we just made.
924 *
925 * Also handle the MAP_WIRED flag here.
926 */
927
928 if (prot == VM_PROT_NONE) {
929
930 /*
931 * No more work to do in this case.
932 */
933
934 return 0;
935 }
936 if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) {
937 vm_map_lock(map);
938 if (atop(size) + uvmexp.wired > uvmexp.wiredmax ||
939 (locklimit != 0 &&
940 size + ptoa(pmap_wired_count(vm_map_pmap(map))) >
941 locklimit)) {
942 vm_map_unlock(map);
943 uvm_unmap(map, *addr, *addr + size);
944 return ENOMEM;
945 }
946
947 /*
948 * uvm_map_pageable() always returns the map unlocked.
949 */
950
951 error = uvm_map_pageable(map, *addr, *addr + size,
952 false, UVM_LK_ENTER);
953 if (error) {
954 uvm_unmap(map, *addr, *addr + size);
955 return error;
956 }
957 return 0;
958 }
959 return 0;
960 }
961
962 vaddr_t
963 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown)
964 {
965
966 if (topdown)
967 return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz);
968 else
969 return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz);
970 }
971
972 int
973 uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev,
974 off_t off)
975 {
976 struct uvm_object *uobj;
977 int error, flags, prot;
978
979 flags = MAP_SHARED;
980 prot = VM_PROT_READ | VM_PROT_WRITE;
981 if (*addrp)
982 flags |= MAP_FIXED;
983 else
984 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
985 (vaddr_t)p->p_vmspace->vm_daddr, len,
986 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
987
988 uobj = udv_attach(dev, prot, off, len);
989 if (uobj == NULL)
990 return EINVAL;
991
992 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
993 (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM, uobj, off,
994 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
995 return error;
996 }
997
998 int
999 uvm_mmap_anon(struct proc *p, void **addrp, size_t len)
1000 {
1001 int error, flags, prot;
1002
1003 flags = MAP_PRIVATE | MAP_ANON;
1004 prot = VM_PROT_READ | VM_PROT_WRITE;
1005 if (*addrp)
1006 flags |= MAP_FIXED;
1007 else
1008 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
1009 (vaddr_t)p->p_vmspace->vm_daddr, len,
1010 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
1011
1012 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
1013 (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL, NULL, 0,
1014 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1015 return error;
1016 }
1017