uvm_mmap.c revision 1.151 1 /* $NetBSD: uvm_mmap.c,v 1.151 2015/01/10 23:35:02 chs Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993 The Regents of the University of California.
6 * Copyright (c) 1988 University of Utah.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * the Systems Programming Group of the University of Utah Computer
12 * Science Department.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
39 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
40 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
41 */
42
43 /*
44 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
45 * function.
46 */
47
48 #include <sys/cdefs.h>
49 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.151 2015/01/10 23:35:02 chs Exp $");
50
51 #include "opt_compat_netbsd.h"
52 #include "opt_pax.h"
53
54 #include <sys/types.h>
55 #include <sys/file.h>
56 #include <sys/filedesc.h>
57 #include <sys/resourcevar.h>
58 #include <sys/mman.h>
59
60 #if defined(PAX_ASLR) || defined(PAX_MPROTECT)
61 #include <sys/pax.h>
62 #endif /* PAX_ASLR || PAX_MPROTECT */
63
64 #include <sys/syscallargs.h>
65
66 #include <uvm/uvm.h>
67 #include <uvm/uvm_device.h>
68
69 static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t,
70 int, int, struct uvm_object *, voff_t, vsize_t);
71
72 static int
73 range_test(vaddr_t addr, vsize_t size, bool ismmap)
74 {
75 vaddr_t vm_min_address = VM_MIN_ADDRESS;
76 vaddr_t vm_max_address = VM_MAXUSER_ADDRESS;
77 vaddr_t eaddr = addr + size;
78 int res = 0;
79
80 if (addr < vm_min_address)
81 return EINVAL;
82 if (eaddr > vm_max_address)
83 return ismmap ? EFBIG : EINVAL;
84 if (addr > eaddr) /* no wrapping! */
85 return ismmap ? EOVERFLOW : EINVAL;
86
87 #ifdef MD_MMAP_RANGE_TEST
88 res = MD_MMAP_RANGE_TEST(addr, eaddr);
89 #endif
90
91 return res;
92 }
93
94 /*
95 * unimplemented VM system calls:
96 */
97
98 /*
99 * sys_sbrk: sbrk system call.
100 */
101
102 /* ARGSUSED */
103 int
104 sys_sbrk(struct lwp *l, const struct sys_sbrk_args *uap, register_t *retval)
105 {
106 /* {
107 syscallarg(intptr_t) incr;
108 } */
109
110 return (ENOSYS);
111 }
112
113 /*
114 * sys_sstk: sstk system call.
115 */
116
117 /* ARGSUSED */
118 int
119 sys_sstk(struct lwp *l, const struct sys_sstk_args *uap, register_t *retval)
120 {
121 /* {
122 syscallarg(int) incr;
123 } */
124
125 return (ENOSYS);
126 }
127
128 /*
129 * sys_mincore: determine if pages are in core or not.
130 */
131
132 /* ARGSUSED */
133 int
134 sys_mincore(struct lwp *l, const struct sys_mincore_args *uap,
135 register_t *retval)
136 {
137 /* {
138 syscallarg(void *) addr;
139 syscallarg(size_t) len;
140 syscallarg(char *) vec;
141 } */
142 struct proc *p = l->l_proc;
143 struct vm_page *pg;
144 char *vec, pgi;
145 struct uvm_object *uobj;
146 struct vm_amap *amap;
147 struct vm_anon *anon;
148 struct vm_map_entry *entry;
149 vaddr_t start, end, lim;
150 struct vm_map *map;
151 vsize_t len;
152 int error = 0, npgs;
153
154 map = &p->p_vmspace->vm_map;
155
156 start = (vaddr_t)SCARG(uap, addr);
157 len = SCARG(uap, len);
158 vec = SCARG(uap, vec);
159
160 if (start & PAGE_MASK)
161 return (EINVAL);
162 len = round_page(len);
163 end = start + len;
164 if (end <= start)
165 return (EINVAL);
166
167 /*
168 * Lock down vec, so our returned status isn't outdated by
169 * storing the status byte for a page.
170 */
171
172 npgs = len >> PAGE_SHIFT;
173 error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE);
174 if (error) {
175 return error;
176 }
177 vm_map_lock_read(map);
178
179 if (uvm_map_lookup_entry(map, start, &entry) == false) {
180 error = ENOMEM;
181 goto out;
182 }
183
184 for (/* nothing */;
185 entry != &map->header && entry->start < end;
186 entry = entry->next) {
187 KASSERT(!UVM_ET_ISSUBMAP(entry));
188 KASSERT(start >= entry->start);
189
190 /* Make sure there are no holes. */
191 if (entry->end < end &&
192 (entry->next == &map->header ||
193 entry->next->start > entry->end)) {
194 error = ENOMEM;
195 goto out;
196 }
197
198 lim = end < entry->end ? end : entry->end;
199
200 /*
201 * Special case for objects with no "real" pages. Those
202 * are always considered resident (mapped devices).
203 */
204
205 if (UVM_ET_ISOBJ(entry)) {
206 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
207 if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) {
208 for (/* nothing */; start < lim;
209 start += PAGE_SIZE, vec++)
210 subyte(vec, 1);
211 continue;
212 }
213 }
214
215 amap = entry->aref.ar_amap; /* upper layer */
216 uobj = entry->object.uvm_obj; /* lower layer */
217
218 if (amap != NULL)
219 amap_lock(amap);
220 if (uobj != NULL)
221 mutex_enter(uobj->vmobjlock);
222
223 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
224 pgi = 0;
225 if (amap != NULL) {
226 /* Check the upper layer first. */
227 anon = amap_lookup(&entry->aref,
228 start - entry->start);
229 /* Don't need to lock anon here. */
230 if (anon != NULL && anon->an_page != NULL) {
231
232 /*
233 * Anon has the page for this entry
234 * offset.
235 */
236
237 pgi = 1;
238 }
239 }
240 if (uobj != NULL && pgi == 0) {
241 /* Check the lower layer. */
242 pg = uvm_pagelookup(uobj,
243 entry->offset + (start - entry->start));
244 if (pg != NULL) {
245
246 /*
247 * Object has the page for this entry
248 * offset.
249 */
250
251 pgi = 1;
252 }
253 }
254 (void) subyte(vec, pgi);
255 }
256 if (uobj != NULL)
257 mutex_exit(uobj->vmobjlock);
258 if (amap != NULL)
259 amap_unlock(amap);
260 }
261
262 out:
263 vm_map_unlock_read(map);
264 uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs);
265 return (error);
266 }
267
268 /*
269 * sys_mmap: mmap system call.
270 *
271 * => file offset and address may not be page aligned
272 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
273 * - if address isn't page aligned the mapping starts at trunc_page(addr)
274 * and the return value is adjusted up by the page offset.
275 */
276
277 int
278 sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval)
279 {
280 /* {
281 syscallarg(void *) addr;
282 syscallarg(size_t) len;
283 syscallarg(int) prot;
284 syscallarg(int) flags;
285 syscallarg(int) fd;
286 syscallarg(long) pad;
287 syscallarg(off_t) pos;
288 } */
289 struct proc *p = l->l_proc;
290 vaddr_t addr;
291 off_t pos;
292 vsize_t size, pageoff;
293 vm_prot_t prot, maxprot;
294 int flags, fd, advice;
295 vaddr_t defaddr;
296 struct file *fp = NULL;
297 struct uvm_object *uobj;
298 int error;
299 #ifdef PAX_ASLR
300 vaddr_t orig_addr;
301 #endif /* PAX_ASLR */
302
303 /*
304 * first, extract syscall args from the uap.
305 */
306
307 addr = (vaddr_t)SCARG(uap, addr);
308 size = (vsize_t)SCARG(uap, len);
309 prot = SCARG(uap, prot) & VM_PROT_ALL;
310 flags = SCARG(uap, flags);
311 fd = SCARG(uap, fd);
312 pos = SCARG(uap, pos);
313
314 #ifdef PAX_ASLR
315 orig_addr = addr;
316 #endif /* PAX_ASLR */
317
318 /*
319 * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and
320 * validate the flags.
321 */
322 if (flags & MAP_COPY) {
323 flags = (flags & ~MAP_COPY) | MAP_PRIVATE;
324 #if defined(COMPAT_10) && defined(__i386__)
325 /*
326 * Ancient kernel on x86 did not obey PROT_EXEC on i386 at least
327 * and ld.so did not turn it on. We take care of this on amd64
328 * in compat32.
329 */
330 prot |= PROT_EXEC;
331 #endif
332 }
333 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
334 return (EINVAL);
335
336 /*
337 * align file position and save offset. adjust size.
338 */
339
340 pageoff = (pos & PAGE_MASK);
341 pos -= pageoff;
342 size += pageoff; /* add offset */
343 size = (vsize_t)round_page(size); /* round up */
344
345 /*
346 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
347 */
348 if (flags & MAP_FIXED) {
349
350 /* ensure address and file offset are aligned properly */
351 addr -= pageoff;
352 if (addr & PAGE_MASK)
353 return (EINVAL);
354
355 error = range_test(addr, size, true);
356 if (error) {
357 return error;
358 }
359
360 } else if (addr == 0 || !(flags & MAP_TRYFIXED)) {
361
362 /*
363 * not fixed: make sure we skip over the largest
364 * possible heap for non-topdown mapping arrangements.
365 * we will refine our guess later (e.g. to account for
366 * VAC, etc)
367 */
368
369 defaddr = p->p_emul->e_vm_default_addr(p,
370 (vaddr_t)p->p_vmspace->vm_daddr, size);
371
372 if (addr == 0 ||
373 !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN))
374 addr = MAX(addr, defaddr);
375 else
376 addr = MIN(addr, defaddr);
377 }
378
379 /*
380 * check for file mappings (i.e. not anonymous) and verify file.
381 */
382
383 advice = UVM_ADV_NORMAL;
384 if ((flags & MAP_ANON) == 0) {
385 if ((fp = fd_getfile(fd)) == NULL)
386 return (EBADF);
387
388 if (fp->f_ops->fo_mmap == NULL) {
389 error = ENODEV;
390 goto out;
391 }
392 error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags,
393 &advice, &uobj, &maxprot);
394 if (error) {
395 goto out;
396 }
397 if (uobj == NULL) {
398 flags |= MAP_ANON;
399 fd_putfile(fd);
400 fp = NULL;
401 goto is_anon;
402 }
403 } else { /* MAP_ANON case */
404 /*
405 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
406 */
407 if (fd != -1)
408 return (EINVAL);
409
410 is_anon: /* label for SunOS style /dev/zero */
411 uobj = NULL;
412 maxprot = VM_PROT_ALL;
413 pos = 0;
414 }
415
416 #ifdef PAX_MPROTECT
417 pax_mprotect(l, &prot, &maxprot);
418 #endif /* PAX_MPROTECT */
419
420 #ifdef PAX_ASLR
421 pax_aslr(l, &addr, orig_addr, flags);
422 #endif /* PAX_ASLR */
423
424 /*
425 * now let kernel internal function uvm_mmap do the work.
426 */
427
428 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
429 flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
430
431 /* remember to add offset */
432 *retval = (register_t)(addr + pageoff);
433
434 out:
435 if (fp != NULL)
436 fd_putfile(fd);
437
438 return (error);
439 }
440
441 /*
442 * sys___msync13: the msync system call (a front-end for flush)
443 */
444
445 int
446 sys___msync13(struct lwp *l, const struct sys___msync13_args *uap,
447 register_t *retval)
448 {
449 /* {
450 syscallarg(void *) addr;
451 syscallarg(size_t) len;
452 syscallarg(int) flags;
453 } */
454 struct proc *p = l->l_proc;
455 vaddr_t addr;
456 vsize_t size, pageoff;
457 struct vm_map *map;
458 int error, rv, flags, uvmflags;
459
460 /*
461 * extract syscall args from the uap
462 */
463
464 addr = (vaddr_t)SCARG(uap, addr);
465 size = (vsize_t)SCARG(uap, len);
466 flags = SCARG(uap, flags);
467
468 /* sanity check flags */
469 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
470 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
471 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
472 return (EINVAL);
473 if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
474 flags |= MS_SYNC;
475
476 /*
477 * align the address to a page boundary and adjust the size accordingly.
478 */
479
480 pageoff = (addr & PAGE_MASK);
481 addr -= pageoff;
482 size += pageoff;
483 size = (vsize_t)round_page(size);
484
485 error = range_test(addr, size, false);
486 if (error)
487 return error;
488
489 /*
490 * get map
491 */
492
493 map = &p->p_vmspace->vm_map;
494
495 /*
496 * XXXCDC: do we really need this semantic?
497 *
498 * XXX Gak! If size is zero we are supposed to sync "all modified
499 * pages with the region containing addr". Unfortunately, we
500 * don't really keep track of individual mmaps so we approximate
501 * by flushing the range of the map entry containing addr.
502 * This can be incorrect if the region splits or is coalesced
503 * with a neighbor.
504 */
505
506 if (size == 0) {
507 struct vm_map_entry *entry;
508
509 vm_map_lock_read(map);
510 rv = uvm_map_lookup_entry(map, addr, &entry);
511 if (rv == true) {
512 addr = entry->start;
513 size = entry->end - entry->start;
514 }
515 vm_map_unlock_read(map);
516 if (rv == false)
517 return (EINVAL);
518 }
519
520 /*
521 * translate MS_ flags into PGO_ flags
522 */
523
524 uvmflags = PGO_CLEANIT;
525 if (flags & MS_INVALIDATE)
526 uvmflags |= PGO_FREE;
527 if (flags & MS_SYNC)
528 uvmflags |= PGO_SYNCIO;
529
530 error = uvm_map_clean(map, addr, addr+size, uvmflags);
531 return error;
532 }
533
534 /*
535 * sys_munmap: unmap a users memory
536 */
537
538 int
539 sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval)
540 {
541 /* {
542 syscallarg(void *) addr;
543 syscallarg(size_t) len;
544 } */
545 struct proc *p = l->l_proc;
546 vaddr_t addr;
547 vsize_t size, pageoff;
548 struct vm_map *map;
549 struct vm_map_entry *dead_entries;
550 int error;
551
552 /*
553 * get syscall args.
554 */
555
556 addr = (vaddr_t)SCARG(uap, addr);
557 size = (vsize_t)SCARG(uap, len);
558
559 /*
560 * align the address to a page boundary and adjust the size accordingly.
561 */
562
563 pageoff = (addr & PAGE_MASK);
564 addr -= pageoff;
565 size += pageoff;
566 size = (vsize_t)round_page(size);
567
568 if (size == 0)
569 return (0);
570
571 error = range_test(addr, size, false);
572 if (error)
573 return error;
574
575 map = &p->p_vmspace->vm_map;
576
577 /*
578 * interesting system call semantic: make sure entire range is
579 * allocated before allowing an unmap.
580 */
581
582 vm_map_lock(map);
583 #if 0
584 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
585 vm_map_unlock(map);
586 return (EINVAL);
587 }
588 #endif
589 uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0);
590 vm_map_unlock(map);
591 if (dead_entries != NULL)
592 uvm_unmap_detach(dead_entries, 0);
593 return (0);
594 }
595
596 /*
597 * sys_mprotect: the mprotect system call
598 */
599
600 int
601 sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap,
602 register_t *retval)
603 {
604 /* {
605 syscallarg(void *) addr;
606 syscallarg(size_t) len;
607 syscallarg(int) prot;
608 } */
609 struct proc *p = l->l_proc;
610 vaddr_t addr;
611 vsize_t size, pageoff;
612 vm_prot_t prot;
613 int error;
614
615 /*
616 * extract syscall args from uap
617 */
618
619 addr = (vaddr_t)SCARG(uap, addr);
620 size = (vsize_t)SCARG(uap, len);
621 prot = SCARG(uap, prot) & VM_PROT_ALL;
622
623 /*
624 * align the address to a page boundary and adjust the size accordingly.
625 */
626
627 pageoff = (addr & PAGE_MASK);
628 addr -= pageoff;
629 size += pageoff;
630 size = round_page(size);
631
632 error = range_test(addr, size, false);
633 if (error)
634 return error;
635
636 error = uvm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
637 false);
638 return error;
639 }
640
641 /*
642 * sys_minherit: the minherit system call
643 */
644
645 int
646 sys_minherit(struct lwp *l, const struct sys_minherit_args *uap,
647 register_t *retval)
648 {
649 /* {
650 syscallarg(void *) addr;
651 syscallarg(int) len;
652 syscallarg(int) inherit;
653 } */
654 struct proc *p = l->l_proc;
655 vaddr_t addr;
656 vsize_t size, pageoff;
657 vm_inherit_t inherit;
658 int error;
659
660 addr = (vaddr_t)SCARG(uap, addr);
661 size = (vsize_t)SCARG(uap, len);
662 inherit = SCARG(uap, inherit);
663
664 /*
665 * align the address to a page boundary and adjust the size accordingly.
666 */
667
668 pageoff = (addr & PAGE_MASK);
669 addr -= pageoff;
670 size += pageoff;
671 size = (vsize_t)round_page(size);
672
673 error = range_test(addr, size, false);
674 if (error)
675 return error;
676
677 error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size,
678 inherit);
679 return error;
680 }
681
682 /*
683 * sys_madvise: give advice about memory usage.
684 */
685
686 /* ARGSUSED */
687 int
688 sys_madvise(struct lwp *l, const struct sys_madvise_args *uap,
689 register_t *retval)
690 {
691 /* {
692 syscallarg(void *) addr;
693 syscallarg(size_t) len;
694 syscallarg(int) behav;
695 } */
696 struct proc *p = l->l_proc;
697 vaddr_t addr;
698 vsize_t size, pageoff;
699 int advice, error;
700
701 addr = (vaddr_t)SCARG(uap, addr);
702 size = (vsize_t)SCARG(uap, len);
703 advice = SCARG(uap, behav);
704
705 /*
706 * align the address to a page boundary, and adjust the size accordingly
707 */
708
709 pageoff = (addr & PAGE_MASK);
710 addr -= pageoff;
711 size += pageoff;
712 size = (vsize_t)round_page(size);
713
714 error = range_test(addr, size, false);
715 if (error)
716 return error;
717
718 switch (advice) {
719 case MADV_NORMAL:
720 case MADV_RANDOM:
721 case MADV_SEQUENTIAL:
722 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
723 advice);
724 break;
725
726 case MADV_WILLNEED:
727
728 /*
729 * Activate all these pages, pre-faulting them in if
730 * necessary.
731 */
732 error = uvm_map_willneed(&p->p_vmspace->vm_map,
733 addr, addr + size);
734 break;
735
736 case MADV_DONTNEED:
737
738 /*
739 * Deactivate all these pages. We don't need them
740 * any more. We don't, however, toss the data in
741 * the pages.
742 */
743
744 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
745 PGO_DEACTIVATE);
746 break;
747
748 case MADV_FREE:
749
750 /*
751 * These pages contain no valid data, and may be
752 * garbage-collected. Toss all resources, including
753 * any swap space in use.
754 */
755
756 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
757 PGO_FREE);
758 break;
759
760 case MADV_SPACEAVAIL:
761
762 /*
763 * XXXMRG What is this? I think it's:
764 *
765 * Ensure that we have allocated backing-store
766 * for these pages.
767 *
768 * This is going to require changes to the page daemon,
769 * as it will free swap space allocated to pages in core.
770 * There's also what to do for device/file/anonymous memory.
771 */
772
773 return (EINVAL);
774
775 default:
776 return (EINVAL);
777 }
778
779 return error;
780 }
781
782 /*
783 * sys_mlock: memory lock
784 */
785
786 int
787 sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval)
788 {
789 /* {
790 syscallarg(const void *) addr;
791 syscallarg(size_t) len;
792 } */
793 struct proc *p = l->l_proc;
794 vaddr_t addr;
795 vsize_t size, pageoff;
796 int error;
797
798 /*
799 * extract syscall args from uap
800 */
801
802 addr = (vaddr_t)SCARG(uap, addr);
803 size = (vsize_t)SCARG(uap, len);
804
805 /*
806 * align the address to a page boundary and adjust the size accordingly
807 */
808
809 pageoff = (addr & PAGE_MASK);
810 addr -= pageoff;
811 size += pageoff;
812 size = (vsize_t)round_page(size);
813
814 error = range_test(addr, size, false);
815 if (error)
816 return error;
817
818 if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
819 return (EAGAIN);
820
821 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
822 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
823 return (EAGAIN);
824
825 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false,
826 0);
827 if (error == EFAULT)
828 error = ENOMEM;
829 return error;
830 }
831
832 /*
833 * sys_munlock: unlock wired pages
834 */
835
836 int
837 sys_munlock(struct lwp *l, const struct sys_munlock_args *uap,
838 register_t *retval)
839 {
840 /* {
841 syscallarg(const void *) addr;
842 syscallarg(size_t) len;
843 } */
844 struct proc *p = l->l_proc;
845 vaddr_t addr;
846 vsize_t size, pageoff;
847 int error;
848
849 /*
850 * extract syscall args from uap
851 */
852
853 addr = (vaddr_t)SCARG(uap, addr);
854 size = (vsize_t)SCARG(uap, len);
855
856 /*
857 * align the address to a page boundary, and adjust the size accordingly
858 */
859
860 pageoff = (addr & PAGE_MASK);
861 addr -= pageoff;
862 size += pageoff;
863 size = (vsize_t)round_page(size);
864
865 error = range_test(addr, size, false);
866 if (error)
867 return error;
868
869 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true,
870 0);
871 if (error == EFAULT)
872 error = ENOMEM;
873 return error;
874 }
875
876 /*
877 * sys_mlockall: lock all pages mapped into an address space.
878 */
879
880 int
881 sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap,
882 register_t *retval)
883 {
884 /* {
885 syscallarg(int) flags;
886 } */
887 struct proc *p = l->l_proc;
888 int error, flags;
889
890 flags = SCARG(uap, flags);
891
892 if (flags == 0 ||
893 (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
894 return (EINVAL);
895
896 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
897 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
898 return (error);
899 }
900
901 /*
902 * sys_munlockall: unlock all pages mapped into an address space.
903 */
904
905 int
906 sys_munlockall(struct lwp *l, const void *v, register_t *retval)
907 {
908 struct proc *p = l->l_proc;
909
910 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
911 return (0);
912 }
913
914 /*
915 * uvm_mmap: internal version of mmap
916 *
917 * - used by sys_mmap and various framebuffers
918 * - uobj is a struct uvm_object pointer or NULL for MAP_ANON
919 * - caller must page-align the file offset
920 */
921
922 int
923 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
924 vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj,
925 voff_t foff, vsize_t locklimit)
926 {
927 vaddr_t align = 0;
928 int error;
929 uvm_flag_t uvmflag = 0;
930
931 /*
932 * check params
933 */
934
935 if (size == 0)
936 return(0);
937 if (foff & PAGE_MASK)
938 return(EINVAL);
939 if ((prot & maxprot) != prot)
940 return(EINVAL);
941
942 /*
943 * for non-fixed mappings, round off the suggested address.
944 * for fixed mappings, check alignment and zap old mappings.
945 */
946
947 if ((flags & MAP_FIXED) == 0) {
948 *addr = round_page(*addr);
949 } else {
950 if (*addr & PAGE_MASK)
951 return(EINVAL);
952 uvmflag |= UVM_FLAG_FIXED;
953 (void) uvm_unmap(map, *addr, *addr + size);
954 }
955
956 /*
957 * Try to see if any requested alignment can even be attemped.
958 * Make sure we can express the alignment (asking for a >= 4GB
959 * alignment on an ILP32 architecure make no sense) and the
960 * alignment is at least for a page sized quanitiy. If the
961 * request was for a fixed mapping, make sure supplied address
962 * adheres to the request alignment.
963 */
964 align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT;
965 if (align) {
966 if (align >= sizeof(vaddr_t) * NBBY)
967 return(EINVAL);
968 align = 1L << align;
969 if (align < PAGE_SIZE)
970 return(EINVAL);
971 if (align >= vm_map_max(map))
972 return(ENOMEM);
973 if (flags & MAP_FIXED) {
974 if ((*addr & (align-1)) != 0)
975 return(EINVAL);
976 align = 0;
977 }
978 }
979
980 /*
981 * check resource limits
982 */
983
984 if (!VM_MAP_IS_KERNEL(map) &&
985 (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) >
986 curproc->p_rlimit[RLIMIT_AS].rlim_cur))
987 return ENOMEM;
988
989 /*
990 * handle anon vs. non-anon mappings. for non-anon mappings attach
991 * to underlying vm object.
992 */
993
994 if (flags & MAP_ANON) {
995 KASSERT(uobj == NULL);
996 foff = UVM_UNKNOWN_OFFSET;
997 if ((flags & MAP_SHARED) == 0)
998 /* XXX: defer amap create */
999 uvmflag |= UVM_FLAG_COPYONW;
1000 else
1001 /* shared: create amap now */
1002 uvmflag |= UVM_FLAG_OVERLAY;
1003
1004 } else {
1005 KASSERT(uobj != NULL);
1006 if ((flags & MAP_SHARED) == 0) {
1007 uvmflag |= UVM_FLAG_COPYONW;
1008 }
1009 }
1010
1011 uvmflag = UVM_MAPFLAG(prot, maxprot,
1012 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
1013 advice, uvmflag);
1014 error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
1015 if (error) {
1016 if (uobj)
1017 uobj->pgops->pgo_detach(uobj);
1018 return error;
1019 }
1020
1021 /*
1022 * POSIX 1003.1b -- if our address space was configured
1023 * to lock all future mappings, wire the one we just made.
1024 *
1025 * Also handle the MAP_WIRED flag here.
1026 */
1027
1028 if (prot == VM_PROT_NONE) {
1029
1030 /*
1031 * No more work to do in this case.
1032 */
1033
1034 return (0);
1035 }
1036 if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) {
1037 vm_map_lock(map);
1038 if (atop(size) + uvmexp.wired > uvmexp.wiredmax ||
1039 (locklimit != 0 &&
1040 size + ptoa(pmap_wired_count(vm_map_pmap(map))) >
1041 locklimit)) {
1042 vm_map_unlock(map);
1043 uvm_unmap(map, *addr, *addr + size);
1044 return ENOMEM;
1045 }
1046
1047 /*
1048 * uvm_map_pageable() always returns the map unlocked.
1049 */
1050
1051 error = uvm_map_pageable(map, *addr, *addr + size,
1052 false, UVM_LK_ENTER);
1053 if (error) {
1054 uvm_unmap(map, *addr, *addr + size);
1055 return error;
1056 }
1057 return (0);
1058 }
1059 return 0;
1060 }
1061
1062 vaddr_t
1063 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz)
1064 {
1065
1066 if (p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN)
1067 return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz);
1068 else
1069 return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz);
1070 }
1071
1072 int
1073 uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev,
1074 off_t off)
1075 {
1076 struct uvm_object *uobj;
1077 int error, flags, prot;
1078
1079 flags = MAP_SHARED;
1080 prot = VM_PROT_READ | VM_PROT_WRITE;
1081 if (*addrp)
1082 flags |= MAP_FIXED;
1083 else
1084 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
1085 (vaddr_t)p->p_vmspace->vm_daddr, len);
1086
1087 uobj = udv_attach(dev, prot, off, len);
1088 if (uobj == NULL)
1089 return EINVAL;
1090
1091 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
1092 (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM,
1093 uobj, off, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1094 return error;
1095 }
1096
1097 int
1098 uvm_mmap_anon(struct proc *p, void **addrp, size_t len)
1099 {
1100 int error, flags, prot;
1101
1102 flags = MAP_PRIVATE | MAP_ANON;
1103 prot = VM_PROT_READ | VM_PROT_WRITE;
1104 if (*addrp)
1105 flags |= MAP_FIXED;
1106 else
1107 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
1108 (vaddr_t)p->p_vmspace->vm_daddr, len);
1109
1110 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
1111 (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL,
1112 NULL, 0, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1113 return error;
1114 }
1115