uvm_mmap.c revision 1.162.6.1 1 /* $NetBSD: uvm_mmap.c,v 1.162.6.1 2017/05/02 03:19:22 pgoyette Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993 The Regents of the University of California.
6 * Copyright (c) 1988 University of Utah.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * the Systems Programming Group of the University of Utah Computer
12 * Science Department.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
39 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
40 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
41 */
42
43 /*
44 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
45 * function.
46 */
47
48 #include <sys/cdefs.h>
49 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.162.6.1 2017/05/02 03:19:22 pgoyette Exp $");
50
51 #include "opt_compat_netbsd.h"
52 #include "opt_pax.h"
53
54 #include <sys/types.h>
55 #include <sys/file.h>
56 #include <sys/filedesc.h>
57 #include <sys/resourcevar.h>
58 #include <sys/mman.h>
59 #include <sys/pax.h>
60
61 #include <sys/syscallargs.h>
62
63 #include <uvm/uvm.h>
64 #include <uvm/uvm_device.h>
65
66 static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t,
67 int, int, struct uvm_object *, voff_t, vsize_t);
68
69 static int
70 range_test(struct vm_map *map, vaddr_t addr, vsize_t size, bool ismmap)
71 {
72 vaddr_t vm_min_address = vm_map_min(map);
73 vaddr_t vm_max_address = vm_map_max(map);
74 vaddr_t eaddr = addr + size;
75 int res = 0;
76
77 if (addr < vm_min_address)
78 return EINVAL;
79 if (eaddr > vm_max_address)
80 return ismmap ? EFBIG : EINVAL;
81 if (addr > eaddr) /* no wrapping! */
82 return ismmap ? EOVERFLOW : EINVAL;
83
84 #ifdef MD_MMAP_RANGE_TEST
85 res = MD_MMAP_RANGE_TEST(addr, eaddr);
86 #endif
87
88 return res;
89 }
90
91 /*
92 * unimplemented VM system calls:
93 */
94
95 /*
96 * sys_sbrk: sbrk system call.
97 */
98
99 /* ARGSUSED */
100 int
101 sys_sbrk(struct lwp *l, const struct sys_sbrk_args *uap, register_t *retval)
102 {
103 /* {
104 syscallarg(intptr_t) incr;
105 } */
106
107 return ENOSYS;
108 }
109
110 /*
111 * sys_sstk: sstk system call.
112 */
113
114 /* ARGSUSED */
115 int
116 sys_sstk(struct lwp *l, const struct sys_sstk_args *uap, register_t *retval)
117 {
118 /* {
119 syscallarg(int) incr;
120 } */
121
122 return ENOSYS;
123 }
124
125 /*
126 * sys_mincore: determine if pages are in core or not.
127 */
128
129 /* ARGSUSED */
130 int
131 sys_mincore(struct lwp *l, const struct sys_mincore_args *uap,
132 register_t *retval)
133 {
134 /* {
135 syscallarg(void *) addr;
136 syscallarg(size_t) len;
137 syscallarg(char *) vec;
138 } */
139 struct proc *p = l->l_proc;
140 struct vm_page *pg;
141 char *vec, pgi;
142 struct uvm_object *uobj;
143 struct vm_amap *amap;
144 struct vm_anon *anon;
145 struct vm_map_entry *entry;
146 vaddr_t start, end, lim;
147 struct vm_map *map;
148 vsize_t len;
149 int error = 0, npgs;
150
151 map = &p->p_vmspace->vm_map;
152
153 start = (vaddr_t)SCARG(uap, addr);
154 len = SCARG(uap, len);
155 vec = SCARG(uap, vec);
156
157 if (start & PAGE_MASK)
158 return EINVAL;
159 len = round_page(len);
160 end = start + len;
161 if (end <= start)
162 return EINVAL;
163
164 /*
165 * Lock down vec, so our returned status isn't outdated by
166 * storing the status byte for a page.
167 */
168
169 npgs = len >> PAGE_SHIFT;
170 error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE);
171 if (error) {
172 return error;
173 }
174 vm_map_lock_read(map);
175
176 if (uvm_map_lookup_entry(map, start, &entry) == false) {
177 error = ENOMEM;
178 goto out;
179 }
180
181 for (/* nothing */;
182 entry != &map->header && entry->start < end;
183 entry = entry->next) {
184 KASSERT(!UVM_ET_ISSUBMAP(entry));
185 KASSERT(start >= entry->start);
186
187 /* Make sure there are no holes. */
188 if (entry->end < end &&
189 (entry->next == &map->header ||
190 entry->next->start > entry->end)) {
191 error = ENOMEM;
192 goto out;
193 }
194
195 lim = end < entry->end ? end : entry->end;
196
197 /*
198 * Special case for objects with no "real" pages. Those
199 * are always considered resident (mapped devices).
200 */
201
202 if (UVM_ET_ISOBJ(entry)) {
203 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
204 if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) {
205 for (/* nothing */; start < lim;
206 start += PAGE_SIZE, vec++)
207 subyte(vec, 1);
208 continue;
209 }
210 }
211
212 amap = entry->aref.ar_amap; /* upper layer */
213 uobj = entry->object.uvm_obj; /* lower layer */
214
215 if (amap != NULL)
216 amap_lock(amap);
217 if (uobj != NULL)
218 mutex_enter(uobj->vmobjlock);
219
220 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
221 pgi = 0;
222 if (amap != NULL) {
223 /* Check the upper layer first. */
224 anon = amap_lookup(&entry->aref,
225 start - entry->start);
226 /* Don't need to lock anon here. */
227 if (anon != NULL && anon->an_page != NULL) {
228
229 /*
230 * Anon has the page for this entry
231 * offset.
232 */
233
234 pgi = 1;
235 }
236 }
237 if (uobj != NULL && pgi == 0) {
238 /* Check the lower layer. */
239 pg = uvm_pagelookup(uobj,
240 entry->offset + (start - entry->start));
241 if (pg != NULL) {
242
243 /*
244 * Object has the page for this entry
245 * offset.
246 */
247
248 pgi = 1;
249 }
250 }
251 (void) subyte(vec, pgi);
252 }
253 if (uobj != NULL)
254 mutex_exit(uobj->vmobjlock);
255 if (amap != NULL)
256 amap_unlock(amap);
257 }
258
259 out:
260 vm_map_unlock_read(map);
261 uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs);
262 return error;
263 }
264
265 /*
266 * sys_mmap: mmap system call.
267 *
268 * => file offset and address may not be page aligned
269 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
270 * - if address isn't page aligned the mapping starts at trunc_page(addr)
271 * and the return value is adjusted up by the page offset.
272 */
273
274 int
275 sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval)
276 {
277 /* {
278 syscallarg(void *) addr;
279 syscallarg(size_t) len;
280 syscallarg(int) prot;
281 syscallarg(int) flags;
282 syscallarg(int) fd;
283 syscallarg(long) pad;
284 syscallarg(off_t) pos;
285 } */
286 struct proc *p = l->l_proc;
287 vaddr_t addr;
288 off_t pos;
289 vsize_t size, pageoff, newsize;
290 vm_prot_t prot, maxprot;
291 int flags, fd, advice;
292 vaddr_t defaddr;
293 struct file *fp = NULL;
294 struct uvm_object *uobj;
295 int error;
296 #ifdef PAX_ASLR
297 vaddr_t orig_addr;
298 #endif /* PAX_ASLR */
299
300 /*
301 * first, extract syscall args from the uap.
302 */
303
304 addr = (vaddr_t)SCARG(uap, addr);
305 size = (vsize_t)SCARG(uap, len);
306 prot = SCARG(uap, prot) & VM_PROT_ALL;
307 flags = SCARG(uap, flags);
308 fd = SCARG(uap, fd);
309 pos = SCARG(uap, pos);
310
311 #ifdef PAX_ASLR
312 orig_addr = addr;
313 #endif /* PAX_ASLR */
314
315 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
316 return EINVAL;
317
318 /*
319 * align file position and save offset. adjust size.
320 */
321
322 pageoff = (pos & PAGE_MASK);
323 pos -= pageoff;
324 newsize = size + pageoff; /* add offset */
325 newsize = (vsize_t)round_page(newsize); /* round up */
326
327 if (newsize < size)
328 return ENOMEM;
329 size = newsize;
330
331 /*
332 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
333 */
334 if (flags & MAP_FIXED) {
335 /* ensure address and file offset are aligned properly */
336 addr -= pageoff;
337 if (addr & PAGE_MASK)
338 return EINVAL;
339
340 error = range_test(&p->p_vmspace->vm_map, addr, size, true);
341 if (error) {
342 return error;
343 }
344 } else if (addr == 0 || !(flags & MAP_TRYFIXED)) {
345 /*
346 * not fixed: make sure we skip over the largest
347 * possible heap for non-topdown mapping arrangements.
348 * we will refine our guess later (e.g. to account for
349 * VAC, etc)
350 */
351
352 defaddr = p->p_emul->e_vm_default_addr(p,
353 (vaddr_t)p->p_vmspace->vm_daddr, size,
354 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
355
356 if (addr == 0 || !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN))
357 addr = MAX(addr, defaddr);
358 else
359 addr = MIN(addr, defaddr);
360 }
361
362 /*
363 * check for file mappings (i.e. not anonymous) and verify file.
364 */
365
366 advice = UVM_ADV_NORMAL;
367 if ((flags & MAP_ANON) == 0) {
368 if ((fp = fd_getfile(fd)) == NULL)
369 return EBADF;
370
371 if (fp->f_ops->fo_mmap == NULL) {
372 error = ENODEV;
373 goto out;
374 }
375 error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags,
376 &advice, &uobj, &maxprot);
377 if (error) {
378 goto out;
379 }
380 if (uobj == NULL) {
381 flags |= MAP_ANON;
382 fd_putfile(fd);
383 fp = NULL;
384 goto is_anon;
385 }
386 } else { /* MAP_ANON case */
387 /*
388 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
389 */
390 if (fd != -1)
391 return EINVAL;
392
393 is_anon: /* label for SunOS style /dev/zero */
394 uobj = NULL;
395 maxprot = VM_PROT_ALL;
396 pos = 0;
397 }
398
399 PAX_MPROTECT_ADJUST(l, &prot, &maxprot);
400
401 pax_aslr_mmap(l, &addr, orig_addr, flags);
402
403 /*
404 * now let kernel internal function uvm_mmap do the work.
405 */
406
407 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
408 flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
409
410 /* remember to add offset */
411 *retval = (register_t)(addr + pageoff);
412
413 out:
414 if (fp != NULL)
415 fd_putfile(fd);
416
417 return error;
418 }
419
420 /*
421 * sys___msync13: the msync system call (a front-end for flush)
422 */
423
424 int
425 sys___msync13(struct lwp *l, const struct sys___msync13_args *uap,
426 register_t *retval)
427 {
428 /* {
429 syscallarg(void *) addr;
430 syscallarg(size_t) len;
431 syscallarg(int) flags;
432 } */
433 struct proc *p = l->l_proc;
434 vaddr_t addr;
435 vsize_t size, pageoff;
436 struct vm_map *map;
437 int error, flags, uvmflags;
438 bool rv;
439
440 /*
441 * extract syscall args from the uap
442 */
443
444 addr = (vaddr_t)SCARG(uap, addr);
445 size = (vsize_t)SCARG(uap, len);
446 flags = SCARG(uap, flags);
447
448 /* sanity check flags */
449 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
450 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
451 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
452 return EINVAL;
453 if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
454 flags |= MS_SYNC;
455
456 /*
457 * align the address to a page boundary and adjust the size accordingly.
458 */
459
460 pageoff = (addr & PAGE_MASK);
461 addr -= pageoff;
462 size += pageoff;
463 size = (vsize_t)round_page(size);
464
465
466 /*
467 * get map
468 */
469 map = &p->p_vmspace->vm_map;
470
471 error = range_test(map, addr, size, false);
472 if (error)
473 return ENOMEM;
474
475 /*
476 * XXXCDC: do we really need this semantic?
477 *
478 * XXX Gak! If size is zero we are supposed to sync "all modified
479 * pages with the region containing addr". Unfortunately, we
480 * don't really keep track of individual mmaps so we approximate
481 * by flushing the range of the map entry containing addr.
482 * This can be incorrect if the region splits or is coalesced
483 * with a neighbor.
484 */
485
486 if (size == 0) {
487 struct vm_map_entry *entry;
488
489 vm_map_lock_read(map);
490 rv = uvm_map_lookup_entry(map, addr, &entry);
491 if (rv == true) {
492 addr = entry->start;
493 size = entry->end - entry->start;
494 }
495 vm_map_unlock_read(map);
496 if (rv == false)
497 return EINVAL;
498 }
499
500 /*
501 * translate MS_ flags into PGO_ flags
502 */
503
504 uvmflags = PGO_CLEANIT;
505 if (flags & MS_INVALIDATE)
506 uvmflags |= PGO_FREE;
507 if (flags & MS_SYNC)
508 uvmflags |= PGO_SYNCIO;
509
510 error = uvm_map_clean(map, addr, addr+size, uvmflags);
511 return error;
512 }
513
514 /*
515 * sys_munmap: unmap a users memory
516 */
517
518 int
519 sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval)
520 {
521 /* {
522 syscallarg(void *) addr;
523 syscallarg(size_t) len;
524 } */
525 struct proc *p = l->l_proc;
526 vaddr_t addr;
527 vsize_t size, pageoff;
528 struct vm_map *map;
529 struct vm_map_entry *dead_entries;
530 int error;
531
532 /*
533 * get syscall args.
534 */
535
536 addr = (vaddr_t)SCARG(uap, addr);
537 size = (vsize_t)SCARG(uap, len);
538
539 /*
540 * align the address to a page boundary and adjust the size accordingly.
541 */
542
543 pageoff = (addr & PAGE_MASK);
544 addr -= pageoff;
545 size += pageoff;
546 size = (vsize_t)round_page(size);
547
548 if (size == 0)
549 return 0;
550
551 map = &p->p_vmspace->vm_map;
552
553 error = range_test(map, addr, size, false);
554 if (error)
555 return EINVAL;
556
557 vm_map_lock(map);
558 #if 0
559 /*
560 * interesting system call semantic: make sure entire range is
561 * allocated before allowing an unmap.
562 */
563 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
564 vm_map_unlock(map);
565 return EINVAL;
566 }
567 #endif
568 uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0);
569 vm_map_unlock(map);
570 if (dead_entries != NULL)
571 uvm_unmap_detach(dead_entries, 0);
572 return 0;
573 }
574
575 /*
576 * sys_mprotect: the mprotect system call
577 */
578
579 int
580 sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap,
581 register_t *retval)
582 {
583 /* {
584 syscallarg(void *) addr;
585 syscallarg(size_t) len;
586 syscallarg(int) prot;
587 } */
588 struct proc *p = l->l_proc;
589 vaddr_t addr;
590 vsize_t size, pageoff;
591 vm_prot_t prot;
592 int error;
593
594 /*
595 * extract syscall args from uap
596 */
597
598 addr = (vaddr_t)SCARG(uap, addr);
599 size = (vsize_t)SCARG(uap, len);
600 prot = SCARG(uap, prot) & VM_PROT_ALL;
601
602 /*
603 * align the address to a page boundary and adjust the size accordingly.
604 */
605
606 pageoff = (addr & PAGE_MASK);
607 addr -= pageoff;
608 size += pageoff;
609 size = round_page(size);
610
611 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
612 if (error)
613 return EINVAL;
614
615 error = uvm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
616 false);
617 return error;
618 }
619
620 /*
621 * sys_minherit: the minherit system call
622 */
623
624 int
625 sys_minherit(struct lwp *l, const struct sys_minherit_args *uap,
626 register_t *retval)
627 {
628 /* {
629 syscallarg(void *) addr;
630 syscallarg(int) len;
631 syscallarg(int) inherit;
632 } */
633 struct proc *p = l->l_proc;
634 vaddr_t addr;
635 vsize_t size, pageoff;
636 vm_inherit_t inherit;
637 int error;
638
639 addr = (vaddr_t)SCARG(uap, addr);
640 size = (vsize_t)SCARG(uap, len);
641 inherit = SCARG(uap, inherit);
642
643 /*
644 * align the address to a page boundary and adjust the size accordingly.
645 */
646
647 pageoff = (addr & PAGE_MASK);
648 addr -= pageoff;
649 size += pageoff;
650 size = (vsize_t)round_page(size);
651
652 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
653 if (error)
654 return EINVAL;
655
656 error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size,
657 inherit);
658 return error;
659 }
660
661 /*
662 * sys_madvise: give advice about memory usage.
663 */
664
665 /* ARGSUSED */
666 int
667 sys_madvise(struct lwp *l, const struct sys_madvise_args *uap,
668 register_t *retval)
669 {
670 /* {
671 syscallarg(void *) addr;
672 syscallarg(size_t) len;
673 syscallarg(int) behav;
674 } */
675 struct proc *p = l->l_proc;
676 vaddr_t addr;
677 vsize_t size, pageoff;
678 int advice, error;
679
680 addr = (vaddr_t)SCARG(uap, addr);
681 size = (vsize_t)SCARG(uap, len);
682 advice = SCARG(uap, behav);
683
684 /*
685 * align the address to a page boundary, and adjust the size accordingly
686 */
687
688 pageoff = (addr & PAGE_MASK);
689 addr -= pageoff;
690 size += pageoff;
691 size = (vsize_t)round_page(size);
692
693 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
694 if (error)
695 return EINVAL;
696
697 switch (advice) {
698 case MADV_NORMAL:
699 case MADV_RANDOM:
700 case MADV_SEQUENTIAL:
701 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
702 advice);
703 break;
704
705 case MADV_WILLNEED:
706
707 /*
708 * Activate all these pages, pre-faulting them in if
709 * necessary.
710 */
711 error = uvm_map_willneed(&p->p_vmspace->vm_map,
712 addr, addr + size);
713 break;
714
715 case MADV_DONTNEED:
716
717 /*
718 * Deactivate all these pages. We don't need them
719 * any more. We don't, however, toss the data in
720 * the pages.
721 */
722
723 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
724 PGO_DEACTIVATE);
725 break;
726
727 case MADV_FREE:
728
729 /*
730 * These pages contain no valid data, and may be
731 * garbage-collected. Toss all resources, including
732 * any swap space in use.
733 */
734
735 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
736 PGO_FREE);
737 break;
738
739 case MADV_SPACEAVAIL:
740
741 /*
742 * XXXMRG What is this? I think it's:
743 *
744 * Ensure that we have allocated backing-store
745 * for these pages.
746 *
747 * This is going to require changes to the page daemon,
748 * as it will free swap space allocated to pages in core.
749 * There's also what to do for device/file/anonymous memory.
750 */
751
752 return EINVAL;
753
754 default:
755 return EINVAL;
756 }
757
758 return error;
759 }
760
761 /*
762 * sys_mlock: memory lock
763 */
764
765 int
766 sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval)
767 {
768 /* {
769 syscallarg(const void *) addr;
770 syscallarg(size_t) len;
771 } */
772 struct proc *p = l->l_proc;
773 vaddr_t addr;
774 vsize_t size, pageoff;
775 int error;
776
777 /*
778 * extract syscall args from uap
779 */
780
781 addr = (vaddr_t)SCARG(uap, addr);
782 size = (vsize_t)SCARG(uap, len);
783
784 /*
785 * align the address to a page boundary and adjust the size accordingly
786 */
787
788 pageoff = (addr & PAGE_MASK);
789 addr -= pageoff;
790 size += pageoff;
791 size = (vsize_t)round_page(size);
792
793 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
794 if (error)
795 return ENOMEM;
796
797 if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
798 return EAGAIN;
799
800 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
801 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
802 return EAGAIN;
803
804 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false,
805 0);
806 if (error == EFAULT)
807 error = ENOMEM;
808 return error;
809 }
810
811 /*
812 * sys_munlock: unlock wired pages
813 */
814
815 int
816 sys_munlock(struct lwp *l, const struct sys_munlock_args *uap,
817 register_t *retval)
818 {
819 /* {
820 syscallarg(const void *) addr;
821 syscallarg(size_t) len;
822 } */
823 struct proc *p = l->l_proc;
824 vaddr_t addr;
825 vsize_t size, pageoff;
826 int error;
827
828 /*
829 * extract syscall args from uap
830 */
831
832 addr = (vaddr_t)SCARG(uap, addr);
833 size = (vsize_t)SCARG(uap, len);
834
835 /*
836 * align the address to a page boundary, and adjust the size accordingly
837 */
838
839 pageoff = (addr & PAGE_MASK);
840 addr -= pageoff;
841 size += pageoff;
842 size = (vsize_t)round_page(size);
843
844 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
845 if (error)
846 return ENOMEM;
847
848 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true,
849 0);
850 if (error)
851 return ENOMEM;
852
853 return 0;
854 }
855
856 /*
857 * sys_mlockall: lock all pages mapped into an address space.
858 */
859
860 int
861 sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap,
862 register_t *retval)
863 {
864 /* {
865 syscallarg(int) flags;
866 } */
867 struct proc *p = l->l_proc;
868 int error, flags;
869
870 flags = SCARG(uap, flags);
871
872 if (flags == 0 || (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
873 return EINVAL;
874
875 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
876 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
877 return error;
878 }
879
880 /*
881 * sys_munlockall: unlock all pages mapped into an address space.
882 */
883
884 int
885 sys_munlockall(struct lwp *l, const void *v, register_t *retval)
886 {
887 struct proc *p = l->l_proc;
888
889 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
890 return 0;
891 }
892
893 /*
894 * uvm_mmap: internal version of mmap
895 *
896 * - used by sys_mmap and various framebuffers
897 * - uobj is a struct uvm_object pointer or NULL for MAP_ANON
898 * - caller must page-align the file offset
899 */
900
901 int
902 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
903 vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj,
904 voff_t foff, vsize_t locklimit)
905 {
906 vaddr_t align = 0;
907 int error;
908 uvm_flag_t uvmflag = 0;
909
910 /*
911 * check params
912 */
913
914 if (size == 0)
915 return 0;
916 if (foff & PAGE_MASK)
917 return EINVAL;
918 if ((prot & maxprot) != prot)
919 return EINVAL;
920
921 /*
922 * for non-fixed mappings, round off the suggested address.
923 * for fixed mappings, check alignment and zap old mappings.
924 */
925
926 if ((flags & MAP_FIXED) == 0) {
927 *addr = round_page(*addr);
928 } else {
929 if (*addr & PAGE_MASK)
930 return EINVAL;
931 uvmflag |= UVM_FLAG_FIXED;
932 (void) uvm_unmap(map, *addr, *addr + size);
933 }
934
935 /*
936 * Try to see if any requested alignment can even be attemped.
937 * Make sure we can express the alignment (asking for a >= 4GB
938 * alignment on an ILP32 architecure make no sense) and the
939 * alignment is at least for a page sized quanitiy. If the
940 * request was for a fixed mapping, make sure supplied address
941 * adheres to the request alignment.
942 */
943 align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT;
944 if (align) {
945 if (align >= sizeof(vaddr_t) * NBBY)
946 return EINVAL;
947 align = 1L << align;
948 if (align < PAGE_SIZE)
949 return EINVAL;
950 if (align >= vm_map_max(map))
951 return ENOMEM;
952 if (flags & MAP_FIXED) {
953 if ((*addr & (align-1)) != 0)
954 return EINVAL;
955 align = 0;
956 }
957 }
958
959 /*
960 * check resource limits
961 */
962
963 if (!VM_MAP_IS_KERNEL(map) &&
964 (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) >
965 curproc->p_rlimit[RLIMIT_AS].rlim_cur))
966 return ENOMEM;
967
968 /*
969 * handle anon vs. non-anon mappings. for non-anon mappings attach
970 * to underlying vm object.
971 */
972
973 if (flags & MAP_ANON) {
974 KASSERT(uobj == NULL);
975 foff = UVM_UNKNOWN_OFFSET;
976 if ((flags & MAP_SHARED) == 0)
977 /* XXX: defer amap create */
978 uvmflag |= UVM_FLAG_COPYONW;
979 else
980 /* shared: create amap now */
981 uvmflag |= UVM_FLAG_OVERLAY;
982
983 } else {
984 KASSERT(uobj != NULL);
985 if ((flags & MAP_SHARED) == 0) {
986 uvmflag |= UVM_FLAG_COPYONW;
987 }
988 }
989
990 uvmflag = UVM_MAPFLAG(prot, maxprot,
991 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, advice,
992 uvmflag);
993 error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
994 if (error) {
995 if (uobj)
996 uobj->pgops->pgo_detach(uobj);
997 return error;
998 }
999
1000 /*
1001 * POSIX 1003.1b -- if our address space was configured
1002 * to lock all future mappings, wire the one we just made.
1003 *
1004 * Also handle the MAP_WIRED flag here.
1005 */
1006
1007 if (prot == VM_PROT_NONE) {
1008
1009 /*
1010 * No more work to do in this case.
1011 */
1012
1013 return 0;
1014 }
1015 if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) {
1016 vm_map_lock(map);
1017 if (atop(size) + uvmexp.wired > uvmexp.wiredmax ||
1018 (locklimit != 0 &&
1019 size + ptoa(pmap_wired_count(vm_map_pmap(map))) >
1020 locklimit)) {
1021 vm_map_unlock(map);
1022 uvm_unmap(map, *addr, *addr + size);
1023 return ENOMEM;
1024 }
1025
1026 /*
1027 * uvm_map_pageable() always returns the map unlocked.
1028 */
1029
1030 error = uvm_map_pageable(map, *addr, *addr + size,
1031 false, UVM_LK_ENTER);
1032 if (error) {
1033 uvm_unmap(map, *addr, *addr + size);
1034 return error;
1035 }
1036 return 0;
1037 }
1038 return 0;
1039 }
1040
1041 vaddr_t
1042 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown)
1043 {
1044
1045 if (topdown)
1046 return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz);
1047 else
1048 return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz);
1049 }
1050
1051 int
1052 uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev,
1053 off_t off)
1054 {
1055 struct uvm_object *uobj;
1056 int error, flags, prot;
1057
1058 flags = MAP_SHARED;
1059 prot = VM_PROT_READ | VM_PROT_WRITE;
1060 if (*addrp)
1061 flags |= MAP_FIXED;
1062 else
1063 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
1064 (vaddr_t)p->p_vmspace->vm_daddr, len,
1065 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
1066
1067 uobj = udv_attach(dev, prot, off, len);
1068 if (uobj == NULL)
1069 return EINVAL;
1070
1071 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
1072 (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM, uobj, off,
1073 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1074 return error;
1075 }
1076
1077 int
1078 uvm_mmap_anon(struct proc *p, void **addrp, size_t len)
1079 {
1080 int error, flags, prot;
1081
1082 flags = MAP_PRIVATE | MAP_ANON;
1083 prot = VM_PROT_READ | VM_PROT_WRITE;
1084 if (*addrp)
1085 flags |= MAP_FIXED;
1086 else
1087 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
1088 (vaddr_t)p->p_vmspace->vm_daddr, len,
1089 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
1090
1091 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
1092 (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL, NULL, 0,
1093 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1094 return error;
1095 }
1096