uvm_mmap.c revision 1.173 1 /* $NetBSD: uvm_mmap.c,v 1.173 2019/08/06 08:10:27 maxv Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993 The Regents of the University of California.
6 * Copyright (c) 1988 University of Utah.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * the Systems Programming Group of the University of Utah Computer
12 * Science Department.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
39 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
40 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
41 */
42
43 /*
44 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
45 * function.
46 */
47
48 #include <sys/cdefs.h>
49 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.173 2019/08/06 08:10:27 maxv Exp $");
50
51 #include "opt_compat_netbsd.h"
52 #include "opt_pax.h"
53
54 #include <sys/types.h>
55 #include <sys/file.h>
56 #include <sys/filedesc.h>
57 #include <sys/resourcevar.h>
58 #include <sys/mman.h>
59 #include <sys/pax.h>
60
61 #include <sys/syscallargs.h>
62
63 #include <uvm/uvm.h>
64 #include <uvm/uvm_device.h>
65
66 static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t,
67 int, int, struct uvm_object *, voff_t, vsize_t);
68
69 static int
70 range_test(const struct vm_map *map, vaddr_t addr, vsize_t size, bool ismmap)
71 {
72 vaddr_t vm_min_address = vm_map_min(map);
73 vaddr_t vm_max_address = vm_map_max(map);
74 vaddr_t eaddr = addr + size;
75 int res = 0;
76
77 if (addr < vm_min_address)
78 return EINVAL;
79 if (eaddr > vm_max_address)
80 return ismmap ? EFBIG : EINVAL;
81 if (addr > eaddr) /* no wrapping! */
82 return ismmap ? EOVERFLOW : EINVAL;
83
84 #ifdef MD_MMAP_RANGE_TEST
85 res = MD_MMAP_RANGE_TEST(addr, eaddr);
86 #endif
87
88 return res;
89 }
90
91 /*
92 * align the address to a page boundary, and adjust the size accordingly
93 */
94 static int
95 round_and_check(const struct vm_map *map, vaddr_t *addr, vsize_t *size)
96 {
97 const vsize_t pageoff = (vsize_t)(*addr & PAGE_MASK);
98
99 *addr -= pageoff;
100
101 if (*size != 0) {
102 *size += pageoff;
103 *size = (vsize_t)round_page(*size);
104 } else if (*addr + *size < *addr) {
105 return ENOMEM;
106 }
107
108 return range_test(map, *addr, *size, false);
109 }
110
111 /*
112 * sys_mincore: determine if pages are in core or not.
113 */
114
115 /* ARGSUSED */
116 int
117 sys_mincore(struct lwp *l, const struct sys_mincore_args *uap,
118 register_t *retval)
119 {
120 /* {
121 syscallarg(void *) addr;
122 syscallarg(size_t) len;
123 syscallarg(char *) vec;
124 } */
125 struct proc *p = l->l_proc;
126 struct vm_page *pg;
127 char *vec, pgi;
128 struct uvm_object *uobj;
129 struct vm_amap *amap;
130 struct vm_anon *anon;
131 struct vm_map_entry *entry;
132 vaddr_t start, end, lim;
133 struct vm_map *map;
134 vsize_t len;
135 int error = 0;
136 size_t npgs;
137
138 map = &p->p_vmspace->vm_map;
139
140 start = (vaddr_t)SCARG(uap, addr);
141 len = SCARG(uap, len);
142 vec = SCARG(uap, vec);
143
144 if (start & PAGE_MASK)
145 return EINVAL;
146 len = round_page(len);
147 end = start + len;
148 if (end <= start)
149 return EINVAL;
150
151 /*
152 * Lock down vec, so our returned status isn't outdated by
153 * storing the status byte for a page.
154 */
155
156 npgs = len >> PAGE_SHIFT;
157 error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE);
158 if (error) {
159 return error;
160 }
161 vm_map_lock_read(map);
162
163 if (uvm_map_lookup_entry(map, start, &entry) == false) {
164 error = ENOMEM;
165 goto out;
166 }
167
168 for (/* nothing */;
169 entry != &map->header && entry->start < end;
170 entry = entry->next) {
171 KASSERT(!UVM_ET_ISSUBMAP(entry));
172 KASSERT(start >= entry->start);
173
174 /* Make sure there are no holes. */
175 if (entry->end < end &&
176 (entry->next == &map->header ||
177 entry->next->start > entry->end)) {
178 error = ENOMEM;
179 goto out;
180 }
181
182 lim = end < entry->end ? end : entry->end;
183
184 /*
185 * Special case for objects with no "real" pages. Those
186 * are always considered resident (mapped devices).
187 */
188
189 if (UVM_ET_ISOBJ(entry)) {
190 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
191 if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) {
192 for (/* nothing */; start < lim;
193 start += PAGE_SIZE, vec++)
194 ustore_char(vec, 1);
195 continue;
196 }
197 }
198
199 amap = entry->aref.ar_amap; /* upper layer */
200 uobj = entry->object.uvm_obj; /* lower layer */
201
202 if (amap != NULL)
203 amap_lock(amap);
204 if (uobj != NULL)
205 mutex_enter(uobj->vmobjlock);
206
207 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
208 pgi = 0;
209 if (amap != NULL) {
210 /* Check the upper layer first. */
211 anon = amap_lookup(&entry->aref,
212 start - entry->start);
213 /* Don't need to lock anon here. */
214 if (anon != NULL && anon->an_page != NULL) {
215
216 /*
217 * Anon has the page for this entry
218 * offset.
219 */
220
221 pgi = 1;
222 }
223 }
224 if (uobj != NULL && pgi == 0) {
225 /* Check the lower layer. */
226 pg = uvm_pagelookup(uobj,
227 entry->offset + (start - entry->start));
228 if (pg != NULL) {
229
230 /*
231 * Object has the page for this entry
232 * offset.
233 */
234
235 pgi = 1;
236 }
237 }
238 (void) ustore_char(vec, pgi);
239 }
240 if (uobj != NULL)
241 mutex_exit(uobj->vmobjlock);
242 if (amap != NULL)
243 amap_unlock(amap);
244 }
245
246 out:
247 vm_map_unlock_read(map);
248 uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs);
249 return error;
250 }
251
252 /*
253 * sys_mmap: mmap system call.
254 *
255 * => file offset and address may not be page aligned
256 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
257 * - if address isn't page aligned the mapping starts at trunc_page(addr)
258 * and the return value is adjusted up by the page offset.
259 */
260
261 int
262 sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval)
263 {
264 /* {
265 syscallarg(void *) addr;
266 syscallarg(size_t) len;
267 syscallarg(int) prot;
268 syscallarg(int) flags;
269 syscallarg(int) fd;
270 syscallarg(long) pad;
271 syscallarg(off_t) pos;
272 } */
273 struct proc *p = l->l_proc;
274 vaddr_t addr;
275 off_t pos;
276 vsize_t size, pageoff, newsize;
277 vm_prot_t prot, maxprot, extraprot;
278 int flags, fd, advice;
279 vaddr_t defaddr;
280 struct file *fp = NULL;
281 struct uvm_object *uobj;
282 int error;
283 #ifdef PAX_ASLR
284 vaddr_t orig_addr;
285 #endif /* PAX_ASLR */
286
287 /*
288 * first, extract syscall args from the uap.
289 */
290
291 addr = (vaddr_t)SCARG(uap, addr);
292 size = (vsize_t)SCARG(uap, len);
293 prot = SCARG(uap, prot) & VM_PROT_ALL;
294 extraprot = PROT_MPROTECT_EXTRACT(SCARG(uap, prot));
295 flags = SCARG(uap, flags);
296 fd = SCARG(uap, fd);
297 pos = SCARG(uap, pos);
298
299 #ifdef PAX_ASLR
300 orig_addr = addr;
301 #endif /* PAX_ASLR */
302
303 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
304 return EINVAL;
305
306 /*
307 * align file position and save offset. adjust size.
308 */
309
310 pageoff = (pos & PAGE_MASK);
311 pos -= pageoff;
312 newsize = size + pageoff; /* add offset */
313 newsize = (vsize_t)round_page(newsize); /* round up */
314
315 if (newsize < size)
316 return ENOMEM;
317 size = newsize;
318
319 /*
320 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
321 */
322 if (flags & MAP_FIXED) {
323 /* ensure address and file offset are aligned properly */
324 addr -= pageoff;
325 if (addr & PAGE_MASK)
326 return EINVAL;
327
328 error = range_test(&p->p_vmspace->vm_map, addr, size, true);
329 if (error) {
330 return error;
331 }
332 } else if (addr == 0 || !(flags & MAP_TRYFIXED)) {
333 /*
334 * not fixed: make sure we skip over the largest
335 * possible heap for non-topdown mapping arrangements.
336 * we will refine our guess later (e.g. to account for
337 * VAC, etc)
338 */
339
340 defaddr = p->p_emul->e_vm_default_addr(p,
341 (vaddr_t)p->p_vmspace->vm_daddr, size,
342 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
343
344 if (addr == 0 || !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN))
345 addr = MAX(addr, defaddr);
346 else
347 addr = MIN(addr, defaddr);
348 }
349
350 /*
351 * check for file mappings (i.e. not anonymous) and verify file.
352 */
353
354 advice = UVM_ADV_NORMAL;
355 if ((flags & MAP_ANON) == 0) {
356 if ((fp = fd_getfile(fd)) == NULL)
357 return EBADF;
358
359 if (fp->f_ops->fo_mmap == NULL) {
360 error = ENODEV;
361 goto out;
362 }
363 error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags,
364 &advice, &uobj, &maxprot);
365 if (error) {
366 goto out;
367 }
368 if (uobj == NULL) {
369 flags |= MAP_ANON;
370 fd_putfile(fd);
371 fp = NULL;
372 goto is_anon;
373 }
374 } else { /* MAP_ANON case */
375 /*
376 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
377 */
378 if (fd != -1)
379 return EINVAL;
380
381 is_anon: /* label for SunOS style /dev/zero */
382 uobj = NULL;
383 maxprot = VM_PROT_ALL;
384 pos = 0;
385 }
386
387 maxprot = PAX_MPROTECT_MAXPROTECT(l, prot, extraprot, maxprot);
388 if (((prot | extraprot) & maxprot) != (prot | extraprot)) {
389 error = EACCES;
390 goto out;
391 }
392 if ((error = PAX_MPROTECT_VALIDATE(l, prot)))
393 goto out;
394
395 pax_aslr_mmap(l, &addr, orig_addr, flags);
396
397 /*
398 * now let kernel internal function uvm_mmap do the work.
399 */
400
401 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
402 flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
403
404 /* remember to add offset */
405 *retval = (register_t)(addr + pageoff);
406
407 out:
408 if (fp != NULL)
409 fd_putfile(fd);
410
411 return error;
412 }
413
414 /*
415 * sys___msync13: the msync system call (a front-end for flush)
416 */
417
418 int
419 sys___msync13(struct lwp *l, const struct sys___msync13_args *uap,
420 register_t *retval)
421 {
422 /* {
423 syscallarg(void *) addr;
424 syscallarg(size_t) len;
425 syscallarg(int) flags;
426 } */
427 struct proc *p = l->l_proc;
428 vaddr_t addr;
429 vsize_t size;
430 struct vm_map *map;
431 int error, flags, uvmflags;
432 bool rv;
433
434 /*
435 * extract syscall args from the uap
436 */
437
438 addr = (vaddr_t)SCARG(uap, addr);
439 size = (vsize_t)SCARG(uap, len);
440 flags = SCARG(uap, flags);
441
442 /* sanity check flags */
443 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
444 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
445 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
446 return EINVAL;
447 if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
448 flags |= MS_SYNC;
449
450 /*
451 * get map
452 */
453 map = &p->p_vmspace->vm_map;
454
455 if (round_and_check(map, &addr, &size))
456 return ENOMEM;
457
458 /*
459 * XXXCDC: do we really need this semantic?
460 *
461 * XXX Gak! If size is zero we are supposed to sync "all modified
462 * pages with the region containing addr". Unfortunately, we
463 * don't really keep track of individual mmaps so we approximate
464 * by flushing the range of the map entry containing addr.
465 * This can be incorrect if the region splits or is coalesced
466 * with a neighbor.
467 */
468
469 if (size == 0) {
470 struct vm_map_entry *entry;
471
472 vm_map_lock_read(map);
473 rv = uvm_map_lookup_entry(map, addr, &entry);
474 if (rv == true) {
475 addr = entry->start;
476 size = entry->end - entry->start;
477 }
478 vm_map_unlock_read(map);
479 if (rv == false)
480 return EINVAL;
481 }
482
483 /*
484 * translate MS_ flags into PGO_ flags
485 */
486
487 uvmflags = PGO_CLEANIT;
488 if (flags & MS_INVALIDATE)
489 uvmflags |= PGO_FREE;
490 if (flags & MS_SYNC)
491 uvmflags |= PGO_SYNCIO;
492
493 error = uvm_map_clean(map, addr, addr+size, uvmflags);
494 return error;
495 }
496
497 /*
498 * sys_munmap: unmap a users memory
499 */
500
501 int
502 sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval)
503 {
504 /* {
505 syscallarg(void *) addr;
506 syscallarg(size_t) len;
507 } */
508 struct proc *p = l->l_proc;
509 vaddr_t addr;
510 vsize_t size;
511 struct vm_map *map;
512 struct vm_map_entry *dead_entries;
513
514 /*
515 * get syscall args.
516 */
517
518 addr = (vaddr_t)SCARG(uap, addr);
519 size = (vsize_t)SCARG(uap, len);
520
521 map = &p->p_vmspace->vm_map;
522
523 if (round_and_check(map, &addr, &size))
524 return EINVAL;
525
526 if (size == 0)
527 return 0;
528
529 vm_map_lock(map);
530 #if 0
531 /*
532 * interesting system call semantic: make sure entire range is
533 * allocated before allowing an unmap.
534 */
535 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
536 vm_map_unlock(map);
537 return EINVAL;
538 }
539 #endif
540 uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0);
541 vm_map_unlock(map);
542 if (dead_entries != NULL)
543 uvm_unmap_detach(dead_entries, 0);
544 return 0;
545 }
546
547 /*
548 * sys_mprotect: the mprotect system call
549 */
550
551 int
552 sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap,
553 register_t *retval)
554 {
555 /* {
556 syscallarg(void *) addr;
557 syscallarg(size_t) len;
558 syscallarg(int) prot;
559 } */
560 struct proc *p = l->l_proc;
561 vaddr_t addr;
562 vsize_t size;
563 vm_prot_t prot;
564 int error;
565
566 /*
567 * extract syscall args from uap
568 */
569
570 addr = (vaddr_t)SCARG(uap, addr);
571 size = (vsize_t)SCARG(uap, len);
572 prot = SCARG(uap, prot) & VM_PROT_ALL;
573
574 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
575 return EINVAL;
576
577 error = uvm_map_protect_user(l, addr, addr + size, prot);
578 return error;
579 }
580
581 /*
582 * sys_minherit: the minherit system call
583 */
584
585 int
586 sys_minherit(struct lwp *l, const struct sys_minherit_args *uap,
587 register_t *retval)
588 {
589 /* {
590 syscallarg(void *) addr;
591 syscallarg(int) len;
592 syscallarg(int) inherit;
593 } */
594 struct proc *p = l->l_proc;
595 vaddr_t addr;
596 vsize_t size;
597 vm_inherit_t inherit;
598 int error;
599
600 addr = (vaddr_t)SCARG(uap, addr);
601 size = (vsize_t)SCARG(uap, len);
602 inherit = SCARG(uap, inherit);
603
604 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
605 return EINVAL;
606
607 error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size,
608 inherit);
609 return error;
610 }
611
612 /*
613 * sys_madvise: give advice about memory usage.
614 */
615
616 /* ARGSUSED */
617 int
618 sys_madvise(struct lwp *l, const struct sys_madvise_args *uap,
619 register_t *retval)
620 {
621 /* {
622 syscallarg(void *) addr;
623 syscallarg(size_t) len;
624 syscallarg(int) behav;
625 } */
626 struct proc *p = l->l_proc;
627 vaddr_t addr;
628 vsize_t size;
629 int advice, error;
630
631 addr = (vaddr_t)SCARG(uap, addr);
632 size = (vsize_t)SCARG(uap, len);
633 advice = SCARG(uap, behav);
634
635 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
636 return EINVAL;
637
638 switch (advice) {
639 case MADV_NORMAL:
640 case MADV_RANDOM:
641 case MADV_SEQUENTIAL:
642 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
643 advice);
644 break;
645
646 case MADV_WILLNEED:
647
648 /*
649 * Activate all these pages, pre-faulting them in if
650 * necessary.
651 */
652 error = uvm_map_willneed(&p->p_vmspace->vm_map,
653 addr, addr + size);
654 break;
655
656 case MADV_DONTNEED:
657
658 /*
659 * Deactivate all these pages. We don't need them
660 * any more. We don't, however, toss the data in
661 * the pages.
662 */
663
664 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
665 PGO_DEACTIVATE);
666 break;
667
668 case MADV_FREE:
669
670 /*
671 * These pages contain no valid data, and may be
672 * garbage-collected. Toss all resources, including
673 * any swap space in use.
674 */
675
676 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
677 PGO_FREE);
678 break;
679
680 case MADV_SPACEAVAIL:
681
682 /*
683 * XXXMRG What is this? I think it's:
684 *
685 * Ensure that we have allocated backing-store
686 * for these pages.
687 *
688 * This is going to require changes to the page daemon,
689 * as it will free swap space allocated to pages in core.
690 * There's also what to do for device/file/anonymous memory.
691 */
692
693 return EINVAL;
694
695 default:
696 return EINVAL;
697 }
698
699 return error;
700 }
701
702 /*
703 * sys_mlock: memory lock
704 */
705
706 int
707 sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval)
708 {
709 /* {
710 syscallarg(const void *) addr;
711 syscallarg(size_t) len;
712 } */
713 struct proc *p = l->l_proc;
714 vaddr_t addr;
715 vsize_t size;
716 int error;
717
718 /*
719 * extract syscall args from uap
720 */
721
722 addr = (vaddr_t)SCARG(uap, addr);
723 size = (vsize_t)SCARG(uap, len);
724
725 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
726 return ENOMEM;
727
728 if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
729 return EAGAIN;
730
731 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
732 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
733 return EAGAIN;
734
735 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false,
736 0);
737 if (error == EFAULT)
738 error = ENOMEM;
739 return error;
740 }
741
742 /*
743 * sys_munlock: unlock wired pages
744 */
745
746 int
747 sys_munlock(struct lwp *l, const struct sys_munlock_args *uap,
748 register_t *retval)
749 {
750 /* {
751 syscallarg(const void *) addr;
752 syscallarg(size_t) len;
753 } */
754 struct proc *p = l->l_proc;
755 vaddr_t addr;
756 vsize_t size;
757
758 /*
759 * extract syscall args from uap
760 */
761
762 addr = (vaddr_t)SCARG(uap, addr);
763 size = (vsize_t)SCARG(uap, len);
764
765 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
766 return ENOMEM;
767
768 if (uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true, 0))
769 return ENOMEM;
770
771 return 0;
772 }
773
774 /*
775 * sys_mlockall: lock all pages mapped into an address space.
776 */
777
778 int
779 sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap,
780 register_t *retval)
781 {
782 /* {
783 syscallarg(int) flags;
784 } */
785 struct proc *p = l->l_proc;
786 int error, flags;
787
788 flags = SCARG(uap, flags);
789
790 if (flags == 0 || (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
791 return EINVAL;
792
793 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
794 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
795 return error;
796 }
797
798 /*
799 * sys_munlockall: unlock all pages mapped into an address space.
800 */
801
802 int
803 sys_munlockall(struct lwp *l, const void *v, register_t *retval)
804 {
805 struct proc *p = l->l_proc;
806
807 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
808 return 0;
809 }
810
811 /*
812 * uvm_mmap: internal version of mmap
813 *
814 * - used by sys_mmap and various framebuffers
815 * - uobj is a struct uvm_object pointer or NULL for MAP_ANON
816 * - caller must page-align the file offset
817 */
818
819 int
820 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
821 vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj,
822 voff_t foff, vsize_t locklimit)
823 {
824 vaddr_t align = 0;
825 int error;
826 uvm_flag_t uvmflag = 0;
827
828 /*
829 * check params
830 */
831
832 if (size == 0)
833 return 0;
834 if (foff & PAGE_MASK)
835 return EINVAL;
836 if ((prot & maxprot) != prot)
837 return EINVAL;
838
839 /*
840 * for non-fixed mappings, round off the suggested address.
841 * for fixed mappings, check alignment.
842 */
843
844 if ((flags & MAP_FIXED) == 0) {
845 *addr = round_page(*addr);
846 } else {
847 if (*addr & PAGE_MASK)
848 return EINVAL;
849 uvmflag |= UVM_FLAG_FIXED | UVM_FLAG_UNMAP;
850 }
851
852 /*
853 * Try to see if any requested alignment can even be attemped.
854 * Make sure we can express the alignment (asking for a >= 4GB
855 * alignment on an ILP32 architecure make no sense) and the
856 * alignment is at least for a page sized quanitiy. If the
857 * request was for a fixed mapping, make sure supplied address
858 * adheres to the request alignment.
859 */
860 align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT;
861 if (align) {
862 if (align >= sizeof(vaddr_t) * NBBY)
863 return EINVAL;
864 align = 1L << align;
865 if (align < PAGE_SIZE)
866 return EINVAL;
867 if (align >= vm_map_max(map))
868 return ENOMEM;
869 if (flags & MAP_FIXED) {
870 if ((*addr & (align-1)) != 0)
871 return EINVAL;
872 align = 0;
873 }
874 }
875
876 /*
877 * check resource limits
878 */
879
880 if (!VM_MAP_IS_KERNEL(map) &&
881 (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) >
882 curproc->p_rlimit[RLIMIT_AS].rlim_cur))
883 return ENOMEM;
884
885 /*
886 * handle anon vs. non-anon mappings. for non-anon mappings attach
887 * to underlying vm object.
888 */
889
890 if (flags & MAP_ANON) {
891 KASSERT(uobj == NULL);
892 foff = UVM_UNKNOWN_OFFSET;
893 if ((flags & MAP_SHARED) == 0)
894 /* XXX: defer amap create */
895 uvmflag |= UVM_FLAG_COPYONW;
896 else
897 /* shared: create amap now */
898 uvmflag |= UVM_FLAG_OVERLAY;
899
900 } else {
901 KASSERT(uobj != NULL);
902 if ((flags & MAP_SHARED) == 0) {
903 uvmflag |= UVM_FLAG_COPYONW;
904 }
905 }
906
907 uvmflag = UVM_MAPFLAG(prot, maxprot,
908 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, advice,
909 uvmflag);
910 error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
911 if (error) {
912 if (uobj)
913 uobj->pgops->pgo_detach(uobj);
914 return error;
915 }
916
917 /*
918 * POSIX 1003.1b -- if our address space was configured
919 * to lock all future mappings, wire the one we just made.
920 *
921 * Also handle the MAP_WIRED flag here.
922 */
923
924 if (prot == VM_PROT_NONE) {
925
926 /*
927 * No more work to do in this case.
928 */
929
930 return 0;
931 }
932 if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) {
933 vm_map_lock(map);
934 if (atop(size) + uvmexp.wired > uvmexp.wiredmax ||
935 (locklimit != 0 &&
936 size + ptoa(pmap_wired_count(vm_map_pmap(map))) >
937 locklimit)) {
938 vm_map_unlock(map);
939 uvm_unmap(map, *addr, *addr + size);
940 return ENOMEM;
941 }
942
943 /*
944 * uvm_map_pageable() always returns the map unlocked.
945 */
946
947 error = uvm_map_pageable(map, *addr, *addr + size,
948 false, UVM_LK_ENTER);
949 if (error) {
950 uvm_unmap(map, *addr, *addr + size);
951 return error;
952 }
953 return 0;
954 }
955 return 0;
956 }
957
958 vaddr_t
959 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown)
960 {
961
962 if (topdown)
963 return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz);
964 else
965 return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz);
966 }
967
968 int
969 uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev,
970 off_t off)
971 {
972 struct uvm_object *uobj;
973 int error, flags, prot;
974
975 flags = MAP_SHARED;
976 prot = VM_PROT_READ | VM_PROT_WRITE;
977 if (*addrp)
978 flags |= MAP_FIXED;
979 else
980 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
981 (vaddr_t)p->p_vmspace->vm_daddr, len,
982 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
983
984 uobj = udv_attach(dev, prot, off, len);
985 if (uobj == NULL)
986 return EINVAL;
987
988 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
989 (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM, uobj, off,
990 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
991 return error;
992 }
993
994 int
995 uvm_mmap_anon(struct proc *p, void **addrp, size_t len)
996 {
997 int error, flags, prot;
998
999 flags = MAP_PRIVATE | MAP_ANON;
1000 prot = VM_PROT_READ | VM_PROT_WRITE;
1001 if (*addrp)
1002 flags |= MAP_FIXED;
1003 else
1004 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
1005 (vaddr_t)p->p_vmspace->vm_daddr, len,
1006 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
1007
1008 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
1009 (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL, NULL, 0,
1010 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1011 return error;
1012 }
1013