uvm_mmap.c revision 1.158 1 /* $NetBSD: uvm_mmap.c,v 1.158 2016/05/24 20:20:57 martin Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993 The Regents of the University of California.
6 * Copyright (c) 1988 University of Utah.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * the Systems Programming Group of the University of Utah Computer
12 * Science Department.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
39 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
40 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
41 */
42
43 /*
44 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
45 * function.
46 */
47
48 #include <sys/cdefs.h>
49 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.158 2016/05/24 20:20:57 martin Exp $");
50
51 #include "opt_compat_netbsd.h"
52 #include "opt_pax.h"
53
54 #include <sys/types.h>
55 #include <sys/file.h>
56 #include <sys/filedesc.h>
57 #include <sys/resourcevar.h>
58 #include <sys/mman.h>
59 #include <sys/pax.h>
60
61 #include <sys/syscallargs.h>
62
63 #include <uvm/uvm.h>
64 #include <uvm/uvm_device.h>
65
66 static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t,
67 int, int, struct uvm_object *, voff_t, vsize_t);
68
69 static int
70 range_test(struct vm_map *map, vaddr_t addr, vsize_t size, bool ismmap)
71 {
72 vaddr_t vm_min_address = vm_map_min(map);
73 vaddr_t vm_max_address = vm_map_max(map);
74 vaddr_t eaddr = addr + size;
75 int res = 0;
76
77 if (addr < vm_min_address)
78 return EINVAL;
79 if (eaddr > vm_max_address)
80 return ismmap ? EFBIG : EINVAL;
81 if (addr > eaddr) /* no wrapping! */
82 return ismmap ? EOVERFLOW : EINVAL;
83
84 #ifdef MD_MMAP_RANGE_TEST
85 res = MD_MMAP_RANGE_TEST(addr, eaddr);
86 #endif
87
88 return res;
89 }
90
91 /*
92 * unimplemented VM system calls:
93 */
94
95 /*
96 * sys_sbrk: sbrk system call.
97 */
98
99 /* ARGSUSED */
100 int
101 sys_sbrk(struct lwp *l, const struct sys_sbrk_args *uap, register_t *retval)
102 {
103 /* {
104 syscallarg(intptr_t) incr;
105 } */
106
107 return (ENOSYS);
108 }
109
110 /*
111 * sys_sstk: sstk system call.
112 */
113
114 /* ARGSUSED */
115 int
116 sys_sstk(struct lwp *l, const struct sys_sstk_args *uap, register_t *retval)
117 {
118 /* {
119 syscallarg(int) incr;
120 } */
121
122 return (ENOSYS);
123 }
124
125 /*
126 * sys_mincore: determine if pages are in core or not.
127 */
128
129 /* ARGSUSED */
130 int
131 sys_mincore(struct lwp *l, const struct sys_mincore_args *uap,
132 register_t *retval)
133 {
134 /* {
135 syscallarg(void *) addr;
136 syscallarg(size_t) len;
137 syscallarg(char *) vec;
138 } */
139 struct proc *p = l->l_proc;
140 struct vm_page *pg;
141 char *vec, pgi;
142 struct uvm_object *uobj;
143 struct vm_amap *amap;
144 struct vm_anon *anon;
145 struct vm_map_entry *entry;
146 vaddr_t start, end, lim;
147 struct vm_map *map;
148 vsize_t len;
149 int error = 0, npgs;
150
151 map = &p->p_vmspace->vm_map;
152
153 start = (vaddr_t)SCARG(uap, addr);
154 len = SCARG(uap, len);
155 vec = SCARG(uap, vec);
156
157 if (start & PAGE_MASK)
158 return (EINVAL);
159 len = round_page(len);
160 end = start + len;
161 if (end <= start)
162 return (EINVAL);
163
164 /*
165 * Lock down vec, so our returned status isn't outdated by
166 * storing the status byte for a page.
167 */
168
169 npgs = len >> PAGE_SHIFT;
170 error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE);
171 if (error) {
172 return error;
173 }
174 vm_map_lock_read(map);
175
176 if (uvm_map_lookup_entry(map, start, &entry) == false) {
177 error = ENOMEM;
178 goto out;
179 }
180
181 for (/* nothing */;
182 entry != &map->header && entry->start < end;
183 entry = entry->next) {
184 KASSERT(!UVM_ET_ISSUBMAP(entry));
185 KASSERT(start >= entry->start);
186
187 /* Make sure there are no holes. */
188 if (entry->end < end &&
189 (entry->next == &map->header ||
190 entry->next->start > entry->end)) {
191 error = ENOMEM;
192 goto out;
193 }
194
195 lim = end < entry->end ? end : entry->end;
196
197 /*
198 * Special case for objects with no "real" pages. Those
199 * are always considered resident (mapped devices).
200 */
201
202 if (UVM_ET_ISOBJ(entry)) {
203 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
204 if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) {
205 for (/* nothing */; start < lim;
206 start += PAGE_SIZE, vec++)
207 subyte(vec, 1);
208 continue;
209 }
210 }
211
212 amap = entry->aref.ar_amap; /* upper layer */
213 uobj = entry->object.uvm_obj; /* lower layer */
214
215 if (amap != NULL)
216 amap_lock(amap);
217 if (uobj != NULL)
218 mutex_enter(uobj->vmobjlock);
219
220 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
221 pgi = 0;
222 if (amap != NULL) {
223 /* Check the upper layer first. */
224 anon = amap_lookup(&entry->aref,
225 start - entry->start);
226 /* Don't need to lock anon here. */
227 if (anon != NULL && anon->an_page != NULL) {
228
229 /*
230 * Anon has the page for this entry
231 * offset.
232 */
233
234 pgi = 1;
235 }
236 }
237 if (uobj != NULL && pgi == 0) {
238 /* Check the lower layer. */
239 pg = uvm_pagelookup(uobj,
240 entry->offset + (start - entry->start));
241 if (pg != NULL) {
242
243 /*
244 * Object has the page for this entry
245 * offset.
246 */
247
248 pgi = 1;
249 }
250 }
251 (void) subyte(vec, pgi);
252 }
253 if (uobj != NULL)
254 mutex_exit(uobj->vmobjlock);
255 if (amap != NULL)
256 amap_unlock(amap);
257 }
258
259 out:
260 vm_map_unlock_read(map);
261 uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs);
262 return (error);
263 }
264
265 /*
266 * sys_mmap: mmap system call.
267 *
268 * => file offset and address may not be page aligned
269 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
270 * - if address isn't page aligned the mapping starts at trunc_page(addr)
271 * and the return value is adjusted up by the page offset.
272 */
273
274 int
275 sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval)
276 {
277 /* {
278 syscallarg(void *) addr;
279 syscallarg(size_t) len;
280 syscallarg(int) prot;
281 syscallarg(int) flags;
282 syscallarg(int) fd;
283 syscallarg(long) pad;
284 syscallarg(off_t) pos;
285 } */
286 struct proc *p = l->l_proc;
287 vaddr_t addr;
288 off_t pos;
289 vsize_t size, pageoff, newsize;
290 vm_prot_t prot, maxprot;
291 int flags, fd, advice;
292 vaddr_t defaddr;
293 struct file *fp = NULL;
294 struct uvm_object *uobj;
295 int error;
296 #ifdef PAX_ASLR
297 vaddr_t orig_addr;
298 #endif /* PAX_ASLR */
299
300 /*
301 * first, extract syscall args from the uap.
302 */
303
304 addr = (vaddr_t)SCARG(uap, addr);
305 size = (vsize_t)SCARG(uap, len);
306 prot = SCARG(uap, prot) & VM_PROT_ALL;
307 flags = SCARG(uap, flags);
308 fd = SCARG(uap, fd);
309 pos = SCARG(uap, pos);
310
311 #ifdef PAX_ASLR
312 orig_addr = addr;
313 #endif /* PAX_ASLR */
314
315 /*
316 * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and
317 * validate the flags.
318 */
319 if (flags & MAP_COPY) {
320 flags = (flags & ~MAP_COPY) | MAP_PRIVATE;
321 #if defined(COMPAT_10) && defined(__i386__)
322 /*
323 * Ancient kernel on x86 did not obey PROT_EXEC on i386 at least
324 * and ld.so did not turn it on. We take care of this on amd64
325 * in compat32.
326 */
327 prot |= PROT_EXEC;
328 #endif
329 }
330 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
331 return (EINVAL);
332
333 /*
334 * align file position and save offset. adjust size.
335 */
336
337 pageoff = (pos & PAGE_MASK);
338 pos -= pageoff;
339 newsize = size + pageoff; /* add offset */
340 newsize = (vsize_t)round_page(newsize); /* round up */
341
342 if (newsize < size)
343 return (ENOMEM);
344 size = newsize;
345
346 /*
347 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
348 */
349 if (flags & MAP_FIXED) {
350
351 /* ensure address and file offset are aligned properly */
352 addr -= pageoff;
353 if (addr & PAGE_MASK)
354 return (EINVAL);
355
356 error = range_test(&p->p_vmspace->vm_map, addr, size, true);
357 if (error) {
358 return error;
359 }
360
361 } else if (addr == 0 || !(flags & MAP_TRYFIXED)) {
362
363 /*
364 * not fixed: make sure we skip over the largest
365 * possible heap for non-topdown mapping arrangements.
366 * we will refine our guess later (e.g. to account for
367 * VAC, etc)
368 */
369
370 defaddr = p->p_emul->e_vm_default_addr(p,
371 (vaddr_t)p->p_vmspace->vm_daddr, size,
372 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
373
374 if (addr == 0 ||
375 !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN))
376 addr = MAX(addr, defaddr);
377 else
378 addr = MIN(addr, defaddr);
379 }
380
381 /*
382 * check for file mappings (i.e. not anonymous) and verify file.
383 */
384
385 advice = UVM_ADV_NORMAL;
386 if ((flags & MAP_ANON) == 0) {
387 if ((fp = fd_getfile(fd)) == NULL)
388 return (EBADF);
389
390 if (fp->f_ops->fo_mmap == NULL) {
391 error = ENODEV;
392 goto out;
393 }
394 error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags,
395 &advice, &uobj, &maxprot);
396 if (error) {
397 goto out;
398 }
399 if (uobj == NULL) {
400 flags |= MAP_ANON;
401 fd_putfile(fd);
402 fp = NULL;
403 goto is_anon;
404 }
405 } else { /* MAP_ANON case */
406 /*
407 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
408 */
409 if (fd != -1)
410 return (EINVAL);
411
412 is_anon: /* label for SunOS style /dev/zero */
413 uobj = NULL;
414 maxprot = VM_PROT_ALL;
415 pos = 0;
416 }
417
418 PAX_MPROTECT_ADJUST(l, &prot, &maxprot);
419
420 pax_aslr_mmap(l, &addr, orig_addr, flags);
421
422 /*
423 * now let kernel internal function uvm_mmap do the work.
424 */
425
426 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
427 flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
428
429 /* remember to add offset */
430 *retval = (register_t)(addr + pageoff);
431
432 out:
433 if (fp != NULL)
434 fd_putfile(fd);
435
436 return (error);
437 }
438
439 /*
440 * sys___msync13: the msync system call (a front-end for flush)
441 */
442
443 int
444 sys___msync13(struct lwp *l, const struct sys___msync13_args *uap,
445 register_t *retval)
446 {
447 /* {
448 syscallarg(void *) addr;
449 syscallarg(size_t) len;
450 syscallarg(int) flags;
451 } */
452 struct proc *p = l->l_proc;
453 vaddr_t addr;
454 vsize_t size, pageoff;
455 struct vm_map *map;
456 int error, rv, flags, uvmflags;
457
458 /*
459 * extract syscall args from the uap
460 */
461
462 addr = (vaddr_t)SCARG(uap, addr);
463 size = (vsize_t)SCARG(uap, len);
464 flags = SCARG(uap, flags);
465
466 /* sanity check flags */
467 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
468 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
469 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
470 return (EINVAL);
471 if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
472 flags |= MS_SYNC;
473
474 /*
475 * align the address to a page boundary and adjust the size accordingly.
476 */
477
478 pageoff = (addr & PAGE_MASK);
479 addr -= pageoff;
480 size += pageoff;
481 size = (vsize_t)round_page(size);
482
483
484 /*
485 * get map
486 */
487 map = &p->p_vmspace->vm_map;
488
489 error = range_test(map, addr, size, false);
490 if (error)
491 return error;
492
493 /*
494 * XXXCDC: do we really need this semantic?
495 *
496 * XXX Gak! If size is zero we are supposed to sync "all modified
497 * pages with the region containing addr". Unfortunately, we
498 * don't really keep track of individual mmaps so we approximate
499 * by flushing the range of the map entry containing addr.
500 * This can be incorrect if the region splits or is coalesced
501 * with a neighbor.
502 */
503
504 if (size == 0) {
505 struct vm_map_entry *entry;
506
507 vm_map_lock_read(map);
508 rv = uvm_map_lookup_entry(map, addr, &entry);
509 if (rv == true) {
510 addr = entry->start;
511 size = entry->end - entry->start;
512 }
513 vm_map_unlock_read(map);
514 if (rv == false)
515 return (EINVAL);
516 }
517
518 /*
519 * translate MS_ flags into PGO_ flags
520 */
521
522 uvmflags = PGO_CLEANIT;
523 if (flags & MS_INVALIDATE)
524 uvmflags |= PGO_FREE;
525 if (flags & MS_SYNC)
526 uvmflags |= PGO_SYNCIO;
527
528 error = uvm_map_clean(map, addr, addr+size, uvmflags);
529 return error;
530 }
531
532 /*
533 * sys_munmap: unmap a users memory
534 */
535
536 int
537 sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval)
538 {
539 /* {
540 syscallarg(void *) addr;
541 syscallarg(size_t) len;
542 } */
543 struct proc *p = l->l_proc;
544 vaddr_t addr;
545 vsize_t size, pageoff;
546 struct vm_map *map;
547 struct vm_map_entry *dead_entries;
548 int error;
549
550 /*
551 * get syscall args.
552 */
553
554 addr = (vaddr_t)SCARG(uap, addr);
555 size = (vsize_t)SCARG(uap, len);
556
557 /*
558 * align the address to a page boundary and adjust the size accordingly.
559 */
560
561 pageoff = (addr & PAGE_MASK);
562 addr -= pageoff;
563 size += pageoff;
564 size = (vsize_t)round_page(size);
565
566 if (size == 0)
567 return (0);
568
569 map = &p->p_vmspace->vm_map;
570
571 error = range_test(map, addr, size, false);
572 if (error)
573 return error;
574
575 /*
576 * interesting system call semantic: make sure entire range is
577 * allocated before allowing an unmap.
578 */
579
580 vm_map_lock(map);
581 #if 0
582 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
583 vm_map_unlock(map);
584 return (EINVAL);
585 }
586 #endif
587 uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0);
588 vm_map_unlock(map);
589 if (dead_entries != NULL)
590 uvm_unmap_detach(dead_entries, 0);
591 return (0);
592 }
593
594 /*
595 * sys_mprotect: the mprotect system call
596 */
597
598 int
599 sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap,
600 register_t *retval)
601 {
602 /* {
603 syscallarg(void *) addr;
604 syscallarg(size_t) len;
605 syscallarg(int) prot;
606 } */
607 struct proc *p = l->l_proc;
608 vaddr_t addr;
609 vsize_t size, pageoff;
610 vm_prot_t prot;
611 int error;
612
613 /*
614 * extract syscall args from uap
615 */
616
617 addr = (vaddr_t)SCARG(uap, addr);
618 size = (vsize_t)SCARG(uap, len);
619 prot = SCARG(uap, prot) & VM_PROT_ALL;
620
621 /*
622 * align the address to a page boundary and adjust the size accordingly.
623 */
624
625 pageoff = (addr & PAGE_MASK);
626 addr -= pageoff;
627 size += pageoff;
628 size = round_page(size);
629
630 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
631 if (error)
632 return error;
633
634 error = uvm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
635 false);
636 return error;
637 }
638
639 /*
640 * sys_minherit: the minherit system call
641 */
642
643 int
644 sys_minherit(struct lwp *l, const struct sys_minherit_args *uap,
645 register_t *retval)
646 {
647 /* {
648 syscallarg(void *) addr;
649 syscallarg(int) len;
650 syscallarg(int) inherit;
651 } */
652 struct proc *p = l->l_proc;
653 vaddr_t addr;
654 vsize_t size, pageoff;
655 vm_inherit_t inherit;
656 int error;
657
658 addr = (vaddr_t)SCARG(uap, addr);
659 size = (vsize_t)SCARG(uap, len);
660 inherit = SCARG(uap, inherit);
661
662 /*
663 * align the address to a page boundary and adjust the size accordingly.
664 */
665
666 pageoff = (addr & PAGE_MASK);
667 addr -= pageoff;
668 size += pageoff;
669 size = (vsize_t)round_page(size);
670
671 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
672 if (error)
673 return error;
674
675 error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size,
676 inherit);
677 return error;
678 }
679
680 /*
681 * sys_madvise: give advice about memory usage.
682 */
683
684 /* ARGSUSED */
685 int
686 sys_madvise(struct lwp *l, const struct sys_madvise_args *uap,
687 register_t *retval)
688 {
689 /* {
690 syscallarg(void *) addr;
691 syscallarg(size_t) len;
692 syscallarg(int) behav;
693 } */
694 struct proc *p = l->l_proc;
695 vaddr_t addr;
696 vsize_t size, pageoff;
697 int advice, error;
698
699 addr = (vaddr_t)SCARG(uap, addr);
700 size = (vsize_t)SCARG(uap, len);
701 advice = SCARG(uap, behav);
702
703 /*
704 * align the address to a page boundary, and adjust the size accordingly
705 */
706
707 pageoff = (addr & PAGE_MASK);
708 addr -= pageoff;
709 size += pageoff;
710 size = (vsize_t)round_page(size);
711
712 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
713 if (error)
714 return error;
715
716 switch (advice) {
717 case MADV_NORMAL:
718 case MADV_RANDOM:
719 case MADV_SEQUENTIAL:
720 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
721 advice);
722 break;
723
724 case MADV_WILLNEED:
725
726 /*
727 * Activate all these pages, pre-faulting them in if
728 * necessary.
729 */
730 error = uvm_map_willneed(&p->p_vmspace->vm_map,
731 addr, addr + size);
732 break;
733
734 case MADV_DONTNEED:
735
736 /*
737 * Deactivate all these pages. We don't need them
738 * any more. We don't, however, toss the data in
739 * the pages.
740 */
741
742 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
743 PGO_DEACTIVATE);
744 break;
745
746 case MADV_FREE:
747
748 /*
749 * These pages contain no valid data, and may be
750 * garbage-collected. Toss all resources, including
751 * any swap space in use.
752 */
753
754 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
755 PGO_FREE);
756 break;
757
758 case MADV_SPACEAVAIL:
759
760 /*
761 * XXXMRG What is this? I think it's:
762 *
763 * Ensure that we have allocated backing-store
764 * for these pages.
765 *
766 * This is going to require changes to the page daemon,
767 * as it will free swap space allocated to pages in core.
768 * There's also what to do for device/file/anonymous memory.
769 */
770
771 return (EINVAL);
772
773 default:
774 return (EINVAL);
775 }
776
777 return error;
778 }
779
780 /*
781 * sys_mlock: memory lock
782 */
783
784 int
785 sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval)
786 {
787 /* {
788 syscallarg(const void *) addr;
789 syscallarg(size_t) len;
790 } */
791 struct proc *p = l->l_proc;
792 vaddr_t addr;
793 vsize_t size, pageoff;
794 int error;
795
796 /*
797 * extract syscall args from uap
798 */
799
800 addr = (vaddr_t)SCARG(uap, addr);
801 size = (vsize_t)SCARG(uap, len);
802
803 /*
804 * align the address to a page boundary and adjust the size accordingly
805 */
806
807 pageoff = (addr & PAGE_MASK);
808 addr -= pageoff;
809 size += pageoff;
810 size = (vsize_t)round_page(size);
811
812 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
813 if (error)
814 return error;
815
816 if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
817 return (EAGAIN);
818
819 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
820 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
821 return (EAGAIN);
822
823 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false,
824 0);
825 if (error == EFAULT)
826 error = ENOMEM;
827 return error;
828 }
829
830 /*
831 * sys_munlock: unlock wired pages
832 */
833
834 int
835 sys_munlock(struct lwp *l, const struct sys_munlock_args *uap,
836 register_t *retval)
837 {
838 /* {
839 syscallarg(const void *) addr;
840 syscallarg(size_t) len;
841 } */
842 struct proc *p = l->l_proc;
843 vaddr_t addr;
844 vsize_t size, pageoff;
845 int error;
846
847 /*
848 * extract syscall args from uap
849 */
850
851 addr = (vaddr_t)SCARG(uap, addr);
852 size = (vsize_t)SCARG(uap, len);
853
854 /*
855 * align the address to a page boundary, and adjust the size accordingly
856 */
857
858 pageoff = (addr & PAGE_MASK);
859 addr -= pageoff;
860 size += pageoff;
861 size = (vsize_t)round_page(size);
862
863 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
864 if (error)
865 return error;
866
867 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true,
868 0);
869 if (error == EFAULT)
870 error = ENOMEM;
871 return error;
872 }
873
874 /*
875 * sys_mlockall: lock all pages mapped into an address space.
876 */
877
878 int
879 sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap,
880 register_t *retval)
881 {
882 /* {
883 syscallarg(int) flags;
884 } */
885 struct proc *p = l->l_proc;
886 int error, flags;
887
888 flags = SCARG(uap, flags);
889
890 if (flags == 0 ||
891 (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
892 return (EINVAL);
893
894 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
895 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
896 return (error);
897 }
898
899 /*
900 * sys_munlockall: unlock all pages mapped into an address space.
901 */
902
903 int
904 sys_munlockall(struct lwp *l, const void *v, register_t *retval)
905 {
906 struct proc *p = l->l_proc;
907
908 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
909 return (0);
910 }
911
912 /*
913 * uvm_mmap: internal version of mmap
914 *
915 * - used by sys_mmap and various framebuffers
916 * - uobj is a struct uvm_object pointer or NULL for MAP_ANON
917 * - caller must page-align the file offset
918 */
919
920 int
921 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
922 vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj,
923 voff_t foff, vsize_t locklimit)
924 {
925 vaddr_t align = 0;
926 int error;
927 uvm_flag_t uvmflag = 0;
928
929 /*
930 * check params
931 */
932
933 if (size == 0)
934 return(0);
935 if (foff & PAGE_MASK)
936 return(EINVAL);
937 if ((prot & maxprot) != prot)
938 return(EINVAL);
939
940 /*
941 * for non-fixed mappings, round off the suggested address.
942 * for fixed mappings, check alignment and zap old mappings.
943 */
944
945 if ((flags & MAP_FIXED) == 0) {
946 *addr = round_page(*addr);
947 } else {
948 if (*addr & PAGE_MASK)
949 return(EINVAL);
950 uvmflag |= UVM_FLAG_FIXED;
951 (void) uvm_unmap(map, *addr, *addr + size);
952 }
953
954 /*
955 * Try to see if any requested alignment can even be attemped.
956 * Make sure we can express the alignment (asking for a >= 4GB
957 * alignment on an ILP32 architecure make no sense) and the
958 * alignment is at least for a page sized quanitiy. If the
959 * request was for a fixed mapping, make sure supplied address
960 * adheres to the request alignment.
961 */
962 align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT;
963 if (align) {
964 if (align >= sizeof(vaddr_t) * NBBY)
965 return(EINVAL);
966 align = 1L << align;
967 if (align < PAGE_SIZE)
968 return(EINVAL);
969 if (align >= vm_map_max(map))
970 return(ENOMEM);
971 if (flags & MAP_FIXED) {
972 if ((*addr & (align-1)) != 0)
973 return(EINVAL);
974 align = 0;
975 }
976 }
977
978 /*
979 * check resource limits
980 */
981
982 if (!VM_MAP_IS_KERNEL(map) &&
983 (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) >
984 curproc->p_rlimit[RLIMIT_AS].rlim_cur))
985 return ENOMEM;
986
987 /*
988 * handle anon vs. non-anon mappings. for non-anon mappings attach
989 * to underlying vm object.
990 */
991
992 if (flags & MAP_ANON) {
993 KASSERT(uobj == NULL);
994 foff = UVM_UNKNOWN_OFFSET;
995 if ((flags & MAP_SHARED) == 0)
996 /* XXX: defer amap create */
997 uvmflag |= UVM_FLAG_COPYONW;
998 else
999 /* shared: create amap now */
1000 uvmflag |= UVM_FLAG_OVERLAY;
1001
1002 } else {
1003 KASSERT(uobj != NULL);
1004 if ((flags & MAP_SHARED) == 0) {
1005 uvmflag |= UVM_FLAG_COPYONW;
1006 }
1007 }
1008
1009 uvmflag = UVM_MAPFLAG(prot, maxprot,
1010 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
1011 advice, uvmflag);
1012 error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
1013 if (error) {
1014 if (uobj)
1015 uobj->pgops->pgo_detach(uobj);
1016 return error;
1017 }
1018
1019 /*
1020 * POSIX 1003.1b -- if our address space was configured
1021 * to lock all future mappings, wire the one we just made.
1022 *
1023 * Also handle the MAP_WIRED flag here.
1024 */
1025
1026 if (prot == VM_PROT_NONE) {
1027
1028 /*
1029 * No more work to do in this case.
1030 */
1031
1032 return (0);
1033 }
1034 if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) {
1035 vm_map_lock(map);
1036 if (atop(size) + uvmexp.wired > uvmexp.wiredmax ||
1037 (locklimit != 0 &&
1038 size + ptoa(pmap_wired_count(vm_map_pmap(map))) >
1039 locklimit)) {
1040 vm_map_unlock(map);
1041 uvm_unmap(map, *addr, *addr + size);
1042 return ENOMEM;
1043 }
1044
1045 /*
1046 * uvm_map_pageable() always returns the map unlocked.
1047 */
1048
1049 error = uvm_map_pageable(map, *addr, *addr + size,
1050 false, UVM_LK_ENTER);
1051 if (error) {
1052 uvm_unmap(map, *addr, *addr + size);
1053 return error;
1054 }
1055 return (0);
1056 }
1057 return 0;
1058 }
1059
1060 vaddr_t
1061 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown)
1062 {
1063
1064 if (topdown)
1065 return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz);
1066 else
1067 return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz);
1068 }
1069
1070 int
1071 uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev,
1072 off_t off)
1073 {
1074 struct uvm_object *uobj;
1075 int error, flags, prot;
1076
1077 flags = MAP_SHARED;
1078 prot = VM_PROT_READ | VM_PROT_WRITE;
1079 if (*addrp)
1080 flags |= MAP_FIXED;
1081 else
1082 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
1083 (vaddr_t)p->p_vmspace->vm_daddr, len,
1084 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
1085
1086 uobj = udv_attach(dev, prot, off, len);
1087 if (uobj == NULL)
1088 return EINVAL;
1089
1090 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
1091 (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM,
1092 uobj, off, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1093 return error;
1094 }
1095
1096 int
1097 uvm_mmap_anon(struct proc *p, void **addrp, size_t len)
1098 {
1099 int error, flags, prot;
1100
1101 flags = MAP_PRIVATE | MAP_ANON;
1102 prot = VM_PROT_READ | VM_PROT_WRITE;
1103 if (*addrp)
1104 flags |= MAP_FIXED;
1105 else
1106 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
1107 (vaddr_t)p->p_vmspace->vm_daddr, len,
1108 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
1109
1110 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
1111 (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL,
1112 NULL, 0, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1113 return error;
1114 }
1115