uvm_mmap.c revision 1.164 1 /* $NetBSD: uvm_mmap.c,v 1.164 2017/05/06 21:34:52 joerg Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993 The Regents of the University of California.
6 * Copyright (c) 1988 University of Utah.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * the Systems Programming Group of the University of Utah Computer
12 * Science Department.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
39 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
40 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
41 */
42
43 /*
44 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
45 * function.
46 */
47
48 #include <sys/cdefs.h>
49 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.164 2017/05/06 21:34:52 joerg Exp $");
50
51 #include "opt_compat_netbsd.h"
52 #include "opt_pax.h"
53
54 #include <sys/types.h>
55 #include <sys/file.h>
56 #include <sys/filedesc.h>
57 #include <sys/resourcevar.h>
58 #include <sys/mman.h>
59 #include <sys/pax.h>
60
61 #include <sys/syscallargs.h>
62
63 #include <uvm/uvm.h>
64 #include <uvm/uvm_device.h>
65
66 static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t,
67 int, int, struct uvm_object *, voff_t, vsize_t);
68
69 static int
70 range_test(struct vm_map *map, vaddr_t addr, vsize_t size, bool ismmap)
71 {
72 vaddr_t vm_min_address = vm_map_min(map);
73 vaddr_t vm_max_address = vm_map_max(map);
74 vaddr_t eaddr = addr + size;
75 int res = 0;
76
77 if (addr < vm_min_address)
78 return EINVAL;
79 if (eaddr > vm_max_address)
80 return ismmap ? EFBIG : EINVAL;
81 if (addr > eaddr) /* no wrapping! */
82 return ismmap ? EOVERFLOW : EINVAL;
83
84 #ifdef MD_MMAP_RANGE_TEST
85 res = MD_MMAP_RANGE_TEST(addr, eaddr);
86 #endif
87
88 return res;
89 }
90
91 /*
92 * unimplemented VM system calls:
93 */
94
95 /*
96 * sys_sbrk: sbrk system call.
97 */
98
99 /* ARGSUSED */
100 int
101 sys_sbrk(struct lwp *l, const struct sys_sbrk_args *uap, register_t *retval)
102 {
103 /* {
104 syscallarg(intptr_t) incr;
105 } */
106
107 return ENOSYS;
108 }
109
110 /*
111 * sys_sstk: sstk system call.
112 */
113
114 /* ARGSUSED */
115 int
116 sys_sstk(struct lwp *l, const struct sys_sstk_args *uap, register_t *retval)
117 {
118 /* {
119 syscallarg(int) incr;
120 } */
121
122 return ENOSYS;
123 }
124
125 /*
126 * sys_mincore: determine if pages are in core or not.
127 */
128
129 /* ARGSUSED */
130 int
131 sys_mincore(struct lwp *l, const struct sys_mincore_args *uap,
132 register_t *retval)
133 {
134 /* {
135 syscallarg(void *) addr;
136 syscallarg(size_t) len;
137 syscallarg(char *) vec;
138 } */
139 struct proc *p = l->l_proc;
140 struct vm_page *pg;
141 char *vec, pgi;
142 struct uvm_object *uobj;
143 struct vm_amap *amap;
144 struct vm_anon *anon;
145 struct vm_map_entry *entry;
146 vaddr_t start, end, lim;
147 struct vm_map *map;
148 vsize_t len;
149 int error = 0, npgs;
150
151 map = &p->p_vmspace->vm_map;
152
153 start = (vaddr_t)SCARG(uap, addr);
154 len = SCARG(uap, len);
155 vec = SCARG(uap, vec);
156
157 if (start & PAGE_MASK)
158 return EINVAL;
159 len = round_page(len);
160 end = start + len;
161 if (end <= start)
162 return EINVAL;
163
164 /*
165 * Lock down vec, so our returned status isn't outdated by
166 * storing the status byte for a page.
167 */
168
169 npgs = len >> PAGE_SHIFT;
170 error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE);
171 if (error) {
172 return error;
173 }
174 vm_map_lock_read(map);
175
176 if (uvm_map_lookup_entry(map, start, &entry) == false) {
177 error = ENOMEM;
178 goto out;
179 }
180
181 for (/* nothing */;
182 entry != &map->header && entry->start < end;
183 entry = entry->next) {
184 KASSERT(!UVM_ET_ISSUBMAP(entry));
185 KASSERT(start >= entry->start);
186
187 /* Make sure there are no holes. */
188 if (entry->end < end &&
189 (entry->next == &map->header ||
190 entry->next->start > entry->end)) {
191 error = ENOMEM;
192 goto out;
193 }
194
195 lim = end < entry->end ? end : entry->end;
196
197 /*
198 * Special case for objects with no "real" pages. Those
199 * are always considered resident (mapped devices).
200 */
201
202 if (UVM_ET_ISOBJ(entry)) {
203 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
204 if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) {
205 for (/* nothing */; start < lim;
206 start += PAGE_SIZE, vec++)
207 subyte(vec, 1);
208 continue;
209 }
210 }
211
212 amap = entry->aref.ar_amap; /* upper layer */
213 uobj = entry->object.uvm_obj; /* lower layer */
214
215 if (amap != NULL)
216 amap_lock(amap);
217 if (uobj != NULL)
218 mutex_enter(uobj->vmobjlock);
219
220 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
221 pgi = 0;
222 if (amap != NULL) {
223 /* Check the upper layer first. */
224 anon = amap_lookup(&entry->aref,
225 start - entry->start);
226 /* Don't need to lock anon here. */
227 if (anon != NULL && anon->an_page != NULL) {
228
229 /*
230 * Anon has the page for this entry
231 * offset.
232 */
233
234 pgi = 1;
235 }
236 }
237 if (uobj != NULL && pgi == 0) {
238 /* Check the lower layer. */
239 pg = uvm_pagelookup(uobj,
240 entry->offset + (start - entry->start));
241 if (pg != NULL) {
242
243 /*
244 * Object has the page for this entry
245 * offset.
246 */
247
248 pgi = 1;
249 }
250 }
251 (void) subyte(vec, pgi);
252 }
253 if (uobj != NULL)
254 mutex_exit(uobj->vmobjlock);
255 if (amap != NULL)
256 amap_unlock(amap);
257 }
258
259 out:
260 vm_map_unlock_read(map);
261 uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs);
262 return error;
263 }
264
265 /*
266 * sys_mmap: mmap system call.
267 *
268 * => file offset and address may not be page aligned
269 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
270 * - if address isn't page aligned the mapping starts at trunc_page(addr)
271 * and the return value is adjusted up by the page offset.
272 */
273
274 int
275 sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval)
276 {
277 /* {
278 syscallarg(void *) addr;
279 syscallarg(size_t) len;
280 syscallarg(int) prot;
281 syscallarg(int) flags;
282 syscallarg(int) fd;
283 syscallarg(long) pad;
284 syscallarg(off_t) pos;
285 } */
286 struct proc *p = l->l_proc;
287 vaddr_t addr;
288 off_t pos;
289 vsize_t size, pageoff, newsize;
290 vm_prot_t prot, maxprot, extraprot;
291 int flags, fd, advice;
292 vaddr_t defaddr;
293 struct file *fp = NULL;
294 struct uvm_object *uobj;
295 int error;
296 #ifdef PAX_ASLR
297 vaddr_t orig_addr;
298 #endif /* PAX_ASLR */
299
300 /*
301 * first, extract syscall args from the uap.
302 */
303
304 addr = (vaddr_t)SCARG(uap, addr);
305 size = (vsize_t)SCARG(uap, len);
306 prot = SCARG(uap, prot) & VM_PROT_ALL;
307 extraprot = PROT_MPROTECT_EXTRACT(SCARG(uap, prot));
308 flags = SCARG(uap, flags);
309 fd = SCARG(uap, fd);
310 pos = SCARG(uap, pos);
311
312 #ifdef PAX_ASLR
313 orig_addr = addr;
314 #endif /* PAX_ASLR */
315
316 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
317 return EINVAL;
318
319 /*
320 * align file position and save offset. adjust size.
321 */
322
323 pageoff = (pos & PAGE_MASK);
324 pos -= pageoff;
325 newsize = size + pageoff; /* add offset */
326 newsize = (vsize_t)round_page(newsize); /* round up */
327
328 if (newsize < size)
329 return ENOMEM;
330 size = newsize;
331
332 /*
333 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
334 */
335 if (flags & MAP_FIXED) {
336 /* ensure address and file offset are aligned properly */
337 addr -= pageoff;
338 if (addr & PAGE_MASK)
339 return EINVAL;
340
341 error = range_test(&p->p_vmspace->vm_map, addr, size, true);
342 if (error) {
343 return error;
344 }
345 } else if (addr == 0 || !(flags & MAP_TRYFIXED)) {
346 /*
347 * not fixed: make sure we skip over the largest
348 * possible heap for non-topdown mapping arrangements.
349 * we will refine our guess later (e.g. to account for
350 * VAC, etc)
351 */
352
353 defaddr = p->p_emul->e_vm_default_addr(p,
354 (vaddr_t)p->p_vmspace->vm_daddr, size,
355 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
356
357 if (addr == 0 || !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN))
358 addr = MAX(addr, defaddr);
359 else
360 addr = MIN(addr, defaddr);
361 }
362
363 /*
364 * check for file mappings (i.e. not anonymous) and verify file.
365 */
366
367 advice = UVM_ADV_NORMAL;
368 if ((flags & MAP_ANON) == 0) {
369 if ((fp = fd_getfile(fd)) == NULL)
370 return EBADF;
371
372 if (fp->f_ops->fo_mmap == NULL) {
373 error = ENODEV;
374 goto out;
375 }
376 error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags,
377 &advice, &uobj, &maxprot);
378 if (error) {
379 goto out;
380 }
381 if (uobj == NULL) {
382 flags |= MAP_ANON;
383 fd_putfile(fd);
384 fp = NULL;
385 goto is_anon;
386 }
387 } else { /* MAP_ANON case */
388 /*
389 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
390 */
391 if (fd != -1)
392 return EINVAL;
393
394 is_anon: /* label for SunOS style /dev/zero */
395 uobj = NULL;
396 maxprot = VM_PROT_ALL;
397 pos = 0;
398 }
399
400 maxprot = PAX_MPROTECT_MAXPROTECT(l, prot, extraprot, maxprot);
401 if (((prot | extraprot) & maxprot) != (prot | extraprot))
402 return EACCES;
403 if ((error = PAX_MPROTECT_VALIDATE(l, prot)))
404 return error;
405
406 pax_aslr_mmap(l, &addr, orig_addr, flags);
407
408 /*
409 * now let kernel internal function uvm_mmap do the work.
410 */
411
412 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
413 flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
414
415 /* remember to add offset */
416 *retval = (register_t)(addr + pageoff);
417
418 out:
419 if (fp != NULL)
420 fd_putfile(fd);
421
422 return error;
423 }
424
425 /*
426 * sys___msync13: the msync system call (a front-end for flush)
427 */
428
429 int
430 sys___msync13(struct lwp *l, const struct sys___msync13_args *uap,
431 register_t *retval)
432 {
433 /* {
434 syscallarg(void *) addr;
435 syscallarg(size_t) len;
436 syscallarg(int) flags;
437 } */
438 struct proc *p = l->l_proc;
439 vaddr_t addr;
440 vsize_t size, pageoff;
441 struct vm_map *map;
442 int error, flags, uvmflags;
443 bool rv;
444
445 /*
446 * extract syscall args from the uap
447 */
448
449 addr = (vaddr_t)SCARG(uap, addr);
450 size = (vsize_t)SCARG(uap, len);
451 flags = SCARG(uap, flags);
452
453 /* sanity check flags */
454 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
455 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
456 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
457 return EINVAL;
458 if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
459 flags |= MS_SYNC;
460
461 /*
462 * align the address to a page boundary and adjust the size accordingly.
463 */
464
465 pageoff = (addr & PAGE_MASK);
466 addr -= pageoff;
467 size += pageoff;
468 size = (vsize_t)round_page(size);
469
470
471 /*
472 * get map
473 */
474 map = &p->p_vmspace->vm_map;
475
476 error = range_test(map, addr, size, false);
477 if (error)
478 return ENOMEM;
479
480 /*
481 * XXXCDC: do we really need this semantic?
482 *
483 * XXX Gak! If size is zero we are supposed to sync "all modified
484 * pages with the region containing addr". Unfortunately, we
485 * don't really keep track of individual mmaps so we approximate
486 * by flushing the range of the map entry containing addr.
487 * This can be incorrect if the region splits or is coalesced
488 * with a neighbor.
489 */
490
491 if (size == 0) {
492 struct vm_map_entry *entry;
493
494 vm_map_lock_read(map);
495 rv = uvm_map_lookup_entry(map, addr, &entry);
496 if (rv == true) {
497 addr = entry->start;
498 size = entry->end - entry->start;
499 }
500 vm_map_unlock_read(map);
501 if (rv == false)
502 return EINVAL;
503 }
504
505 /*
506 * translate MS_ flags into PGO_ flags
507 */
508
509 uvmflags = PGO_CLEANIT;
510 if (flags & MS_INVALIDATE)
511 uvmflags |= PGO_FREE;
512 if (flags & MS_SYNC)
513 uvmflags |= PGO_SYNCIO;
514
515 error = uvm_map_clean(map, addr, addr+size, uvmflags);
516 return error;
517 }
518
519 /*
520 * sys_munmap: unmap a users memory
521 */
522
523 int
524 sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval)
525 {
526 /* {
527 syscallarg(void *) addr;
528 syscallarg(size_t) len;
529 } */
530 struct proc *p = l->l_proc;
531 vaddr_t addr;
532 vsize_t size, pageoff;
533 struct vm_map *map;
534 struct vm_map_entry *dead_entries;
535 int error;
536
537 /*
538 * get syscall args.
539 */
540
541 addr = (vaddr_t)SCARG(uap, addr);
542 size = (vsize_t)SCARG(uap, len);
543
544 /*
545 * align the address to a page boundary and adjust the size accordingly.
546 */
547
548 pageoff = (addr & PAGE_MASK);
549 addr -= pageoff;
550 size += pageoff;
551 size = (vsize_t)round_page(size);
552
553 if (size == 0)
554 return 0;
555
556 map = &p->p_vmspace->vm_map;
557
558 error = range_test(map, addr, size, false);
559 if (error)
560 return EINVAL;
561
562 vm_map_lock(map);
563 #if 0
564 /*
565 * interesting system call semantic: make sure entire range is
566 * allocated before allowing an unmap.
567 */
568 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
569 vm_map_unlock(map);
570 return EINVAL;
571 }
572 #endif
573 uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0);
574 vm_map_unlock(map);
575 if (dead_entries != NULL)
576 uvm_unmap_detach(dead_entries, 0);
577 return 0;
578 }
579
580 /*
581 * sys_mprotect: the mprotect system call
582 */
583
584 int
585 sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap,
586 register_t *retval)
587 {
588 /* {
589 syscallarg(void *) addr;
590 syscallarg(size_t) len;
591 syscallarg(int) prot;
592 } */
593 struct proc *p = l->l_proc;
594 vaddr_t addr;
595 vsize_t size, pageoff;
596 vm_prot_t prot;
597 int error;
598
599 /*
600 * extract syscall args from uap
601 */
602
603 addr = (vaddr_t)SCARG(uap, addr);
604 size = (vsize_t)SCARG(uap, len);
605 prot = SCARG(uap, prot) & VM_PROT_ALL;
606
607 /*
608 * align the address to a page boundary and adjust the size accordingly.
609 */
610
611 pageoff = (addr & PAGE_MASK);
612 addr -= pageoff;
613 size += pageoff;
614 size = round_page(size);
615
616 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
617 if (error)
618 return EINVAL;
619
620 error = uvm_map_protect_user(l, addr, addr + size, prot);
621 return error;
622 }
623
624 /*
625 * sys_minherit: the minherit system call
626 */
627
628 int
629 sys_minherit(struct lwp *l, const struct sys_minherit_args *uap,
630 register_t *retval)
631 {
632 /* {
633 syscallarg(void *) addr;
634 syscallarg(int) len;
635 syscallarg(int) inherit;
636 } */
637 struct proc *p = l->l_proc;
638 vaddr_t addr;
639 vsize_t size, pageoff;
640 vm_inherit_t inherit;
641 int error;
642
643 addr = (vaddr_t)SCARG(uap, addr);
644 size = (vsize_t)SCARG(uap, len);
645 inherit = SCARG(uap, inherit);
646
647 /*
648 * align the address to a page boundary and adjust the size accordingly.
649 */
650
651 pageoff = (addr & PAGE_MASK);
652 addr -= pageoff;
653 size += pageoff;
654 size = (vsize_t)round_page(size);
655
656 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
657 if (error)
658 return EINVAL;
659
660 error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size,
661 inherit);
662 return error;
663 }
664
665 /*
666 * sys_madvise: give advice about memory usage.
667 */
668
669 /* ARGSUSED */
670 int
671 sys_madvise(struct lwp *l, const struct sys_madvise_args *uap,
672 register_t *retval)
673 {
674 /* {
675 syscallarg(void *) addr;
676 syscallarg(size_t) len;
677 syscallarg(int) behav;
678 } */
679 struct proc *p = l->l_proc;
680 vaddr_t addr;
681 vsize_t size, pageoff;
682 int advice, error;
683
684 addr = (vaddr_t)SCARG(uap, addr);
685 size = (vsize_t)SCARG(uap, len);
686 advice = SCARG(uap, behav);
687
688 /*
689 * align the address to a page boundary, and adjust the size accordingly
690 */
691
692 pageoff = (addr & PAGE_MASK);
693 addr -= pageoff;
694 size += pageoff;
695 size = (vsize_t)round_page(size);
696
697 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
698 if (error)
699 return EINVAL;
700
701 switch (advice) {
702 case MADV_NORMAL:
703 case MADV_RANDOM:
704 case MADV_SEQUENTIAL:
705 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
706 advice);
707 break;
708
709 case MADV_WILLNEED:
710
711 /*
712 * Activate all these pages, pre-faulting them in if
713 * necessary.
714 */
715 error = uvm_map_willneed(&p->p_vmspace->vm_map,
716 addr, addr + size);
717 break;
718
719 case MADV_DONTNEED:
720
721 /*
722 * Deactivate all these pages. We don't need them
723 * any more. We don't, however, toss the data in
724 * the pages.
725 */
726
727 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
728 PGO_DEACTIVATE);
729 break;
730
731 case MADV_FREE:
732
733 /*
734 * These pages contain no valid data, and may be
735 * garbage-collected. Toss all resources, including
736 * any swap space in use.
737 */
738
739 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
740 PGO_FREE);
741 break;
742
743 case MADV_SPACEAVAIL:
744
745 /*
746 * XXXMRG What is this? I think it's:
747 *
748 * Ensure that we have allocated backing-store
749 * for these pages.
750 *
751 * This is going to require changes to the page daemon,
752 * as it will free swap space allocated to pages in core.
753 * There's also what to do for device/file/anonymous memory.
754 */
755
756 return EINVAL;
757
758 default:
759 return EINVAL;
760 }
761
762 return error;
763 }
764
765 /*
766 * sys_mlock: memory lock
767 */
768
769 int
770 sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval)
771 {
772 /* {
773 syscallarg(const void *) addr;
774 syscallarg(size_t) len;
775 } */
776 struct proc *p = l->l_proc;
777 vaddr_t addr;
778 vsize_t size, pageoff;
779 int error;
780
781 /*
782 * extract syscall args from uap
783 */
784
785 addr = (vaddr_t)SCARG(uap, addr);
786 size = (vsize_t)SCARG(uap, len);
787
788 /*
789 * align the address to a page boundary and adjust the size accordingly
790 */
791
792 pageoff = (addr & PAGE_MASK);
793 addr -= pageoff;
794 size += pageoff;
795 size = (vsize_t)round_page(size);
796
797 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
798 if (error)
799 return ENOMEM;
800
801 if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
802 return EAGAIN;
803
804 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
805 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
806 return EAGAIN;
807
808 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false,
809 0);
810 if (error == EFAULT)
811 error = ENOMEM;
812 return error;
813 }
814
815 /*
816 * sys_munlock: unlock wired pages
817 */
818
819 int
820 sys_munlock(struct lwp *l, const struct sys_munlock_args *uap,
821 register_t *retval)
822 {
823 /* {
824 syscallarg(const void *) addr;
825 syscallarg(size_t) len;
826 } */
827 struct proc *p = l->l_proc;
828 vaddr_t addr;
829 vsize_t size, pageoff;
830 int error;
831
832 /*
833 * extract syscall args from uap
834 */
835
836 addr = (vaddr_t)SCARG(uap, addr);
837 size = (vsize_t)SCARG(uap, len);
838
839 /*
840 * align the address to a page boundary, and adjust the size accordingly
841 */
842
843 pageoff = (addr & PAGE_MASK);
844 addr -= pageoff;
845 size += pageoff;
846 size = (vsize_t)round_page(size);
847
848 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
849 if (error)
850 return ENOMEM;
851
852 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true,
853 0);
854 if (error)
855 return ENOMEM;
856
857 return 0;
858 }
859
860 /*
861 * sys_mlockall: lock all pages mapped into an address space.
862 */
863
864 int
865 sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap,
866 register_t *retval)
867 {
868 /* {
869 syscallarg(int) flags;
870 } */
871 struct proc *p = l->l_proc;
872 int error, flags;
873
874 flags = SCARG(uap, flags);
875
876 if (flags == 0 || (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
877 return EINVAL;
878
879 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
880 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
881 return error;
882 }
883
884 /*
885 * sys_munlockall: unlock all pages mapped into an address space.
886 */
887
888 int
889 sys_munlockall(struct lwp *l, const void *v, register_t *retval)
890 {
891 struct proc *p = l->l_proc;
892
893 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
894 return 0;
895 }
896
897 /*
898 * uvm_mmap: internal version of mmap
899 *
900 * - used by sys_mmap and various framebuffers
901 * - uobj is a struct uvm_object pointer or NULL for MAP_ANON
902 * - caller must page-align the file offset
903 */
904
905 int
906 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
907 vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj,
908 voff_t foff, vsize_t locklimit)
909 {
910 vaddr_t align = 0;
911 int error;
912 uvm_flag_t uvmflag = 0;
913
914 /*
915 * check params
916 */
917
918 if (size == 0)
919 return 0;
920 if (foff & PAGE_MASK)
921 return EINVAL;
922 if ((prot & maxprot) != prot)
923 return EINVAL;
924
925 /*
926 * for non-fixed mappings, round off the suggested address.
927 * for fixed mappings, check alignment and zap old mappings.
928 */
929
930 if ((flags & MAP_FIXED) == 0) {
931 *addr = round_page(*addr);
932 } else {
933 if (*addr & PAGE_MASK)
934 return EINVAL;
935 uvmflag |= UVM_FLAG_FIXED;
936 (void) uvm_unmap(map, *addr, *addr + size);
937 }
938
939 /*
940 * Try to see if any requested alignment can even be attemped.
941 * Make sure we can express the alignment (asking for a >= 4GB
942 * alignment on an ILP32 architecure make no sense) and the
943 * alignment is at least for a page sized quanitiy. If the
944 * request was for a fixed mapping, make sure supplied address
945 * adheres to the request alignment.
946 */
947 align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT;
948 if (align) {
949 if (align >= sizeof(vaddr_t) * NBBY)
950 return EINVAL;
951 align = 1L << align;
952 if (align < PAGE_SIZE)
953 return EINVAL;
954 if (align >= vm_map_max(map))
955 return ENOMEM;
956 if (flags & MAP_FIXED) {
957 if ((*addr & (align-1)) != 0)
958 return EINVAL;
959 align = 0;
960 }
961 }
962
963 /*
964 * check resource limits
965 */
966
967 if (!VM_MAP_IS_KERNEL(map) &&
968 (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) >
969 curproc->p_rlimit[RLIMIT_AS].rlim_cur))
970 return ENOMEM;
971
972 /*
973 * handle anon vs. non-anon mappings. for non-anon mappings attach
974 * to underlying vm object.
975 */
976
977 if (flags & MAP_ANON) {
978 KASSERT(uobj == NULL);
979 foff = UVM_UNKNOWN_OFFSET;
980 if ((flags & MAP_SHARED) == 0)
981 /* XXX: defer amap create */
982 uvmflag |= UVM_FLAG_COPYONW;
983 else
984 /* shared: create amap now */
985 uvmflag |= UVM_FLAG_OVERLAY;
986
987 } else {
988 KASSERT(uobj != NULL);
989 if ((flags & MAP_SHARED) == 0) {
990 uvmflag |= UVM_FLAG_COPYONW;
991 }
992 }
993
994 uvmflag = UVM_MAPFLAG(prot, maxprot,
995 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, advice,
996 uvmflag);
997 error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
998 if (error) {
999 if (uobj)
1000 uobj->pgops->pgo_detach(uobj);
1001 return error;
1002 }
1003
1004 /*
1005 * POSIX 1003.1b -- if our address space was configured
1006 * to lock all future mappings, wire the one we just made.
1007 *
1008 * Also handle the MAP_WIRED flag here.
1009 */
1010
1011 if (prot == VM_PROT_NONE) {
1012
1013 /*
1014 * No more work to do in this case.
1015 */
1016
1017 return 0;
1018 }
1019 if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) {
1020 vm_map_lock(map);
1021 if (atop(size) + uvmexp.wired > uvmexp.wiredmax ||
1022 (locklimit != 0 &&
1023 size + ptoa(pmap_wired_count(vm_map_pmap(map))) >
1024 locklimit)) {
1025 vm_map_unlock(map);
1026 uvm_unmap(map, *addr, *addr + size);
1027 return ENOMEM;
1028 }
1029
1030 /*
1031 * uvm_map_pageable() always returns the map unlocked.
1032 */
1033
1034 error = uvm_map_pageable(map, *addr, *addr + size,
1035 false, UVM_LK_ENTER);
1036 if (error) {
1037 uvm_unmap(map, *addr, *addr + size);
1038 return error;
1039 }
1040 return 0;
1041 }
1042 return 0;
1043 }
1044
1045 vaddr_t
1046 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown)
1047 {
1048
1049 if (topdown)
1050 return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz);
1051 else
1052 return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz);
1053 }
1054
1055 int
1056 uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev,
1057 off_t off)
1058 {
1059 struct uvm_object *uobj;
1060 int error, flags, prot;
1061
1062 flags = MAP_SHARED;
1063 prot = VM_PROT_READ | VM_PROT_WRITE;
1064 if (*addrp)
1065 flags |= MAP_FIXED;
1066 else
1067 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
1068 (vaddr_t)p->p_vmspace->vm_daddr, len,
1069 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
1070
1071 uobj = udv_attach(dev, prot, off, len);
1072 if (uobj == NULL)
1073 return EINVAL;
1074
1075 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
1076 (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM, uobj, off,
1077 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1078 return error;
1079 }
1080
1081 int
1082 uvm_mmap_anon(struct proc *p, void **addrp, size_t len)
1083 {
1084 int error, flags, prot;
1085
1086 flags = MAP_PRIVATE | MAP_ANON;
1087 prot = VM_PROT_READ | VM_PROT_WRITE;
1088 if (*addrp)
1089 flags |= MAP_FIXED;
1090 else
1091 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
1092 (vaddr_t)p->p_vmspace->vm_daddr, len,
1093 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
1094
1095 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
1096 (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL, NULL, 0,
1097 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1098 return error;
1099 }
1100