uvm_mmap.c revision 1.154 1 /* $NetBSD: uvm_mmap.c,v 1.154 2015/11/26 13:15:34 martin Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993 The Regents of the University of California.
6 * Copyright (c) 1988 University of Utah.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * the Systems Programming Group of the University of Utah Computer
12 * Science Department.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
39 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
40 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
41 */
42
43 /*
44 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
45 * function.
46 */
47
48 #include <sys/cdefs.h>
49 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.154 2015/11/26 13:15:34 martin Exp $");
50
51 #include "opt_compat_netbsd.h"
52 #include "opt_pax.h"
53
54 #include <sys/types.h>
55 #include <sys/file.h>
56 #include <sys/filedesc.h>
57 #include <sys/resourcevar.h>
58 #include <sys/mman.h>
59
60 #if defined(PAX_ASLR) || defined(PAX_MPROTECT)
61 #include <sys/pax.h>
62 #endif /* PAX_ASLR || PAX_MPROTECT */
63
64 #include <sys/syscallargs.h>
65
66 #include <uvm/uvm.h>
67 #include <uvm/uvm_device.h>
68
69 static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t,
70 int, int, struct uvm_object *, voff_t, vsize_t);
71
72 static int
73 range_test(vaddr_t addr, vsize_t size, bool ismmap)
74 {
75 vaddr_t vm_min_address = VM_MIN_ADDRESS;
76 vaddr_t vm_max_address = VM_MAXUSER_ADDRESS;
77 vaddr_t eaddr = addr + size;
78 int res = 0;
79
80 if (addr < vm_min_address)
81 return EINVAL;
82 if (eaddr > vm_max_address)
83 return ismmap ? EFBIG : EINVAL;
84 if (addr > eaddr) /* no wrapping! */
85 return ismmap ? EOVERFLOW : EINVAL;
86
87 #ifdef MD_MMAP_RANGE_TEST
88 res = MD_MMAP_RANGE_TEST(addr, eaddr);
89 #endif
90
91 return res;
92 }
93
94 /*
95 * unimplemented VM system calls:
96 */
97
98 /*
99 * sys_sbrk: sbrk system call.
100 */
101
102 /* ARGSUSED */
103 int
104 sys_sbrk(struct lwp *l, const struct sys_sbrk_args *uap, register_t *retval)
105 {
106 /* {
107 syscallarg(intptr_t) incr;
108 } */
109
110 return (ENOSYS);
111 }
112
113 /*
114 * sys_sstk: sstk system call.
115 */
116
117 /* ARGSUSED */
118 int
119 sys_sstk(struct lwp *l, const struct sys_sstk_args *uap, register_t *retval)
120 {
121 /* {
122 syscallarg(int) incr;
123 } */
124
125 return (ENOSYS);
126 }
127
128 /*
129 * sys_mincore: determine if pages are in core or not.
130 */
131
132 /* ARGSUSED */
133 int
134 sys_mincore(struct lwp *l, const struct sys_mincore_args *uap,
135 register_t *retval)
136 {
137 /* {
138 syscallarg(void *) addr;
139 syscallarg(size_t) len;
140 syscallarg(char *) vec;
141 } */
142 struct proc *p = l->l_proc;
143 struct vm_page *pg;
144 char *vec, pgi;
145 struct uvm_object *uobj;
146 struct vm_amap *amap;
147 struct vm_anon *anon;
148 struct vm_map_entry *entry;
149 vaddr_t start, end, lim;
150 struct vm_map *map;
151 vsize_t len;
152 int error = 0, npgs;
153
154 map = &p->p_vmspace->vm_map;
155
156 start = (vaddr_t)SCARG(uap, addr);
157 len = SCARG(uap, len);
158 vec = SCARG(uap, vec);
159
160 if (start & PAGE_MASK)
161 return (EINVAL);
162 len = round_page(len);
163 end = start + len;
164 if (end <= start)
165 return (EINVAL);
166
167 /*
168 * Lock down vec, so our returned status isn't outdated by
169 * storing the status byte for a page.
170 */
171
172 npgs = len >> PAGE_SHIFT;
173 error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE);
174 if (error) {
175 return error;
176 }
177 vm_map_lock_read(map);
178
179 if (uvm_map_lookup_entry(map, start, &entry) == false) {
180 error = ENOMEM;
181 goto out;
182 }
183
184 for (/* nothing */;
185 entry != &map->header && entry->start < end;
186 entry = entry->next) {
187 KASSERT(!UVM_ET_ISSUBMAP(entry));
188 KASSERT(start >= entry->start);
189
190 /* Make sure there are no holes. */
191 if (entry->end < end &&
192 (entry->next == &map->header ||
193 entry->next->start > entry->end)) {
194 error = ENOMEM;
195 goto out;
196 }
197
198 lim = end < entry->end ? end : entry->end;
199
200 /*
201 * Special case for objects with no "real" pages. Those
202 * are always considered resident (mapped devices).
203 */
204
205 if (UVM_ET_ISOBJ(entry)) {
206 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
207 if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) {
208 for (/* nothing */; start < lim;
209 start += PAGE_SIZE, vec++)
210 subyte(vec, 1);
211 continue;
212 }
213 }
214
215 amap = entry->aref.ar_amap; /* upper layer */
216 uobj = entry->object.uvm_obj; /* lower layer */
217
218 if (amap != NULL)
219 amap_lock(amap);
220 if (uobj != NULL)
221 mutex_enter(uobj->vmobjlock);
222
223 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
224 pgi = 0;
225 if (amap != NULL) {
226 /* Check the upper layer first. */
227 anon = amap_lookup(&entry->aref,
228 start - entry->start);
229 /* Don't need to lock anon here. */
230 if (anon != NULL && anon->an_page != NULL) {
231
232 /*
233 * Anon has the page for this entry
234 * offset.
235 */
236
237 pgi = 1;
238 }
239 }
240 if (uobj != NULL && pgi == 0) {
241 /* Check the lower layer. */
242 pg = uvm_pagelookup(uobj,
243 entry->offset + (start - entry->start));
244 if (pg != NULL) {
245
246 /*
247 * Object has the page for this entry
248 * offset.
249 */
250
251 pgi = 1;
252 }
253 }
254 (void) subyte(vec, pgi);
255 }
256 if (uobj != NULL)
257 mutex_exit(uobj->vmobjlock);
258 if (amap != NULL)
259 amap_unlock(amap);
260 }
261
262 out:
263 vm_map_unlock_read(map);
264 uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs);
265 return (error);
266 }
267
268 /*
269 * sys_mmap: mmap system call.
270 *
271 * => file offset and address may not be page aligned
272 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
273 * - if address isn't page aligned the mapping starts at trunc_page(addr)
274 * and the return value is adjusted up by the page offset.
275 */
276
277 int
278 sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval)
279 {
280 /* {
281 syscallarg(void *) addr;
282 syscallarg(size_t) len;
283 syscallarg(int) prot;
284 syscallarg(int) flags;
285 syscallarg(int) fd;
286 syscallarg(long) pad;
287 syscallarg(off_t) pos;
288 } */
289 struct proc *p = l->l_proc;
290 vaddr_t addr;
291 off_t pos;
292 vsize_t size, pageoff, newsize;
293 vm_prot_t prot, maxprot;
294 int flags, fd, advice;
295 vaddr_t defaddr;
296 struct file *fp = NULL;
297 struct uvm_object *uobj;
298 int error;
299 #ifdef PAX_ASLR
300 vaddr_t orig_addr;
301 #endif /* PAX_ASLR */
302
303 /*
304 * first, extract syscall args from the uap.
305 */
306
307 addr = (vaddr_t)SCARG(uap, addr);
308 size = (vsize_t)SCARG(uap, len);
309 prot = SCARG(uap, prot) & VM_PROT_ALL;
310 flags = SCARG(uap, flags);
311 fd = SCARG(uap, fd);
312 pos = SCARG(uap, pos);
313
314 #ifdef PAX_ASLR
315 orig_addr = addr;
316 #endif /* PAX_ASLR */
317
318 /*
319 * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and
320 * validate the flags.
321 */
322 if (flags & MAP_COPY) {
323 flags = (flags & ~MAP_COPY) | MAP_PRIVATE;
324 #if defined(COMPAT_10) && defined(__i386__)
325 /*
326 * Ancient kernel on x86 did not obey PROT_EXEC on i386 at least
327 * and ld.so did not turn it on. We take care of this on amd64
328 * in compat32.
329 */
330 prot |= PROT_EXEC;
331 #endif
332 }
333 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
334 return (EINVAL);
335
336 /*
337 * align file position and save offset. adjust size.
338 */
339
340 pageoff = (pos & PAGE_MASK);
341 pos -= pageoff;
342 newsize = size + pageoff; /* add offset */
343 newsize = (vsize_t)round_page(newsize); /* round up */
344
345 if (newsize < size)
346 return (ENOMEM);
347 size = newsize;
348
349 /*
350 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
351 */
352 if (flags & MAP_FIXED) {
353
354 /* ensure address and file offset are aligned properly */
355 addr -= pageoff;
356 if (addr & PAGE_MASK)
357 return (EINVAL);
358
359 error = range_test(addr, size, true);
360 if (error) {
361 return error;
362 }
363
364 } else if (addr == 0 || !(flags & MAP_TRYFIXED)) {
365
366 /*
367 * not fixed: make sure we skip over the largest
368 * possible heap for non-topdown mapping arrangements.
369 * we will refine our guess later (e.g. to account for
370 * VAC, etc)
371 */
372
373 defaddr = p->p_emul->e_vm_default_addr(p,
374 (vaddr_t)p->p_vmspace->vm_daddr, size,
375 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
376
377 if (addr == 0 ||
378 !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN))
379 addr = MAX(addr, defaddr);
380 else
381 addr = MIN(addr, defaddr);
382 }
383
384 /*
385 * check for file mappings (i.e. not anonymous) and verify file.
386 */
387
388 advice = UVM_ADV_NORMAL;
389 if ((flags & MAP_ANON) == 0) {
390 if ((fp = fd_getfile(fd)) == NULL)
391 return (EBADF);
392
393 if (fp->f_ops->fo_mmap == NULL) {
394 error = ENODEV;
395 goto out;
396 }
397 error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags,
398 &advice, &uobj, &maxprot);
399 if (error) {
400 goto out;
401 }
402 if (uobj == NULL) {
403 flags |= MAP_ANON;
404 fd_putfile(fd);
405 fp = NULL;
406 goto is_anon;
407 }
408 } else { /* MAP_ANON case */
409 /*
410 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
411 */
412 if (fd != -1)
413 return (EINVAL);
414
415 is_anon: /* label for SunOS style /dev/zero */
416 uobj = NULL;
417 maxprot = VM_PROT_ALL;
418 pos = 0;
419 }
420
421 #ifdef PAX_MPROTECT
422 pax_mprotect(l, &prot, &maxprot);
423 #endif /* PAX_MPROTECT */
424
425 #ifdef PAX_ASLR
426 pax_aslr_mmap(l, &addr, orig_addr, flags);
427 #endif /* PAX_ASLR */
428
429 /*
430 * now let kernel internal function uvm_mmap do the work.
431 */
432
433 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
434 flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
435
436 /* remember to add offset */
437 *retval = (register_t)(addr + pageoff);
438
439 out:
440 if (fp != NULL)
441 fd_putfile(fd);
442
443 return (error);
444 }
445
446 /*
447 * sys___msync13: the msync system call (a front-end for flush)
448 */
449
450 int
451 sys___msync13(struct lwp *l, const struct sys___msync13_args *uap,
452 register_t *retval)
453 {
454 /* {
455 syscallarg(void *) addr;
456 syscallarg(size_t) len;
457 syscallarg(int) flags;
458 } */
459 struct proc *p = l->l_proc;
460 vaddr_t addr;
461 vsize_t size, pageoff;
462 struct vm_map *map;
463 int error, rv, flags, uvmflags;
464
465 /*
466 * extract syscall args from the uap
467 */
468
469 addr = (vaddr_t)SCARG(uap, addr);
470 size = (vsize_t)SCARG(uap, len);
471 flags = SCARG(uap, flags);
472
473 /* sanity check flags */
474 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
475 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
476 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
477 return (EINVAL);
478 if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
479 flags |= MS_SYNC;
480
481 /*
482 * align the address to a page boundary and adjust the size accordingly.
483 */
484
485 pageoff = (addr & PAGE_MASK);
486 addr -= pageoff;
487 size += pageoff;
488 size = (vsize_t)round_page(size);
489
490 error = range_test(addr, size, false);
491 if (error)
492 return error;
493
494 /*
495 * get map
496 */
497
498 map = &p->p_vmspace->vm_map;
499
500 /*
501 * XXXCDC: do we really need this semantic?
502 *
503 * XXX Gak! If size is zero we are supposed to sync "all modified
504 * pages with the region containing addr". Unfortunately, we
505 * don't really keep track of individual mmaps so we approximate
506 * by flushing the range of the map entry containing addr.
507 * This can be incorrect if the region splits or is coalesced
508 * with a neighbor.
509 */
510
511 if (size == 0) {
512 struct vm_map_entry *entry;
513
514 vm_map_lock_read(map);
515 rv = uvm_map_lookup_entry(map, addr, &entry);
516 if (rv == true) {
517 addr = entry->start;
518 size = entry->end - entry->start;
519 }
520 vm_map_unlock_read(map);
521 if (rv == false)
522 return (EINVAL);
523 }
524
525 /*
526 * translate MS_ flags into PGO_ flags
527 */
528
529 uvmflags = PGO_CLEANIT;
530 if (flags & MS_INVALIDATE)
531 uvmflags |= PGO_FREE;
532 if (flags & MS_SYNC)
533 uvmflags |= PGO_SYNCIO;
534
535 error = uvm_map_clean(map, addr, addr+size, uvmflags);
536 return error;
537 }
538
539 /*
540 * sys_munmap: unmap a users memory
541 */
542
543 int
544 sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval)
545 {
546 /* {
547 syscallarg(void *) addr;
548 syscallarg(size_t) len;
549 } */
550 struct proc *p = l->l_proc;
551 vaddr_t addr;
552 vsize_t size, pageoff;
553 struct vm_map *map;
554 struct vm_map_entry *dead_entries;
555 int error;
556
557 /*
558 * get syscall args.
559 */
560
561 addr = (vaddr_t)SCARG(uap, addr);
562 size = (vsize_t)SCARG(uap, len);
563
564 /*
565 * align the address to a page boundary and adjust the size accordingly.
566 */
567
568 pageoff = (addr & PAGE_MASK);
569 addr -= pageoff;
570 size += pageoff;
571 size = (vsize_t)round_page(size);
572
573 if (size == 0)
574 return (0);
575
576 error = range_test(addr, size, false);
577 if (error)
578 return error;
579
580 map = &p->p_vmspace->vm_map;
581
582 /*
583 * interesting system call semantic: make sure entire range is
584 * allocated before allowing an unmap.
585 */
586
587 vm_map_lock(map);
588 #if 0
589 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
590 vm_map_unlock(map);
591 return (EINVAL);
592 }
593 #endif
594 uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0);
595 vm_map_unlock(map);
596 if (dead_entries != NULL)
597 uvm_unmap_detach(dead_entries, 0);
598 return (0);
599 }
600
601 /*
602 * sys_mprotect: the mprotect system call
603 */
604
605 int
606 sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap,
607 register_t *retval)
608 {
609 /* {
610 syscallarg(void *) addr;
611 syscallarg(size_t) len;
612 syscallarg(int) prot;
613 } */
614 struct proc *p = l->l_proc;
615 vaddr_t addr;
616 vsize_t size, pageoff;
617 vm_prot_t prot;
618 int error;
619
620 /*
621 * extract syscall args from uap
622 */
623
624 addr = (vaddr_t)SCARG(uap, addr);
625 size = (vsize_t)SCARG(uap, len);
626 prot = SCARG(uap, prot) & VM_PROT_ALL;
627
628 /*
629 * align the address to a page boundary and adjust the size accordingly.
630 */
631
632 pageoff = (addr & PAGE_MASK);
633 addr -= pageoff;
634 size += pageoff;
635 size = round_page(size);
636
637 error = range_test(addr, size, false);
638 if (error)
639 return error;
640
641 error = uvm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
642 false);
643 return error;
644 }
645
646 /*
647 * sys_minherit: the minherit system call
648 */
649
650 int
651 sys_minherit(struct lwp *l, const struct sys_minherit_args *uap,
652 register_t *retval)
653 {
654 /* {
655 syscallarg(void *) addr;
656 syscallarg(int) len;
657 syscallarg(int) inherit;
658 } */
659 struct proc *p = l->l_proc;
660 vaddr_t addr;
661 vsize_t size, pageoff;
662 vm_inherit_t inherit;
663 int error;
664
665 addr = (vaddr_t)SCARG(uap, addr);
666 size = (vsize_t)SCARG(uap, len);
667 inherit = SCARG(uap, inherit);
668
669 /*
670 * align the address to a page boundary and adjust the size accordingly.
671 */
672
673 pageoff = (addr & PAGE_MASK);
674 addr -= pageoff;
675 size += pageoff;
676 size = (vsize_t)round_page(size);
677
678 error = range_test(addr, size, false);
679 if (error)
680 return error;
681
682 error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size,
683 inherit);
684 return error;
685 }
686
687 /*
688 * sys_madvise: give advice about memory usage.
689 */
690
691 /* ARGSUSED */
692 int
693 sys_madvise(struct lwp *l, const struct sys_madvise_args *uap,
694 register_t *retval)
695 {
696 /* {
697 syscallarg(void *) addr;
698 syscallarg(size_t) len;
699 syscallarg(int) behav;
700 } */
701 struct proc *p = l->l_proc;
702 vaddr_t addr;
703 vsize_t size, pageoff;
704 int advice, error;
705
706 addr = (vaddr_t)SCARG(uap, addr);
707 size = (vsize_t)SCARG(uap, len);
708 advice = SCARG(uap, behav);
709
710 /*
711 * align the address to a page boundary, and adjust the size accordingly
712 */
713
714 pageoff = (addr & PAGE_MASK);
715 addr -= pageoff;
716 size += pageoff;
717 size = (vsize_t)round_page(size);
718
719 error = range_test(addr, size, false);
720 if (error)
721 return error;
722
723 switch (advice) {
724 case MADV_NORMAL:
725 case MADV_RANDOM:
726 case MADV_SEQUENTIAL:
727 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
728 advice);
729 break;
730
731 case MADV_WILLNEED:
732
733 /*
734 * Activate all these pages, pre-faulting them in if
735 * necessary.
736 */
737 error = uvm_map_willneed(&p->p_vmspace->vm_map,
738 addr, addr + size);
739 break;
740
741 case MADV_DONTNEED:
742
743 /*
744 * Deactivate all these pages. We don't need them
745 * any more. We don't, however, toss the data in
746 * the pages.
747 */
748
749 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
750 PGO_DEACTIVATE);
751 break;
752
753 case MADV_FREE:
754
755 /*
756 * These pages contain no valid data, and may be
757 * garbage-collected. Toss all resources, including
758 * any swap space in use.
759 */
760
761 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
762 PGO_FREE);
763 break;
764
765 case MADV_SPACEAVAIL:
766
767 /*
768 * XXXMRG What is this? I think it's:
769 *
770 * Ensure that we have allocated backing-store
771 * for these pages.
772 *
773 * This is going to require changes to the page daemon,
774 * as it will free swap space allocated to pages in core.
775 * There's also what to do for device/file/anonymous memory.
776 */
777
778 return (EINVAL);
779
780 default:
781 return (EINVAL);
782 }
783
784 return error;
785 }
786
787 /*
788 * sys_mlock: memory lock
789 */
790
791 int
792 sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval)
793 {
794 /* {
795 syscallarg(const void *) addr;
796 syscallarg(size_t) len;
797 } */
798 struct proc *p = l->l_proc;
799 vaddr_t addr;
800 vsize_t size, pageoff;
801 int error;
802
803 /*
804 * extract syscall args from uap
805 */
806
807 addr = (vaddr_t)SCARG(uap, addr);
808 size = (vsize_t)SCARG(uap, len);
809
810 /*
811 * align the address to a page boundary and adjust the size accordingly
812 */
813
814 pageoff = (addr & PAGE_MASK);
815 addr -= pageoff;
816 size += pageoff;
817 size = (vsize_t)round_page(size);
818
819 error = range_test(addr, size, false);
820 if (error)
821 return error;
822
823 if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
824 return (EAGAIN);
825
826 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
827 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
828 return (EAGAIN);
829
830 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false,
831 0);
832 if (error == EFAULT)
833 error = ENOMEM;
834 return error;
835 }
836
837 /*
838 * sys_munlock: unlock wired pages
839 */
840
841 int
842 sys_munlock(struct lwp *l, const struct sys_munlock_args *uap,
843 register_t *retval)
844 {
845 /* {
846 syscallarg(const void *) addr;
847 syscallarg(size_t) len;
848 } */
849 struct proc *p = l->l_proc;
850 vaddr_t addr;
851 vsize_t size, pageoff;
852 int error;
853
854 /*
855 * extract syscall args from uap
856 */
857
858 addr = (vaddr_t)SCARG(uap, addr);
859 size = (vsize_t)SCARG(uap, len);
860
861 /*
862 * align the address to a page boundary, and adjust the size accordingly
863 */
864
865 pageoff = (addr & PAGE_MASK);
866 addr -= pageoff;
867 size += pageoff;
868 size = (vsize_t)round_page(size);
869
870 error = range_test(addr, size, false);
871 if (error)
872 return error;
873
874 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true,
875 0);
876 if (error == EFAULT)
877 error = ENOMEM;
878 return error;
879 }
880
881 /*
882 * sys_mlockall: lock all pages mapped into an address space.
883 */
884
885 int
886 sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap,
887 register_t *retval)
888 {
889 /* {
890 syscallarg(int) flags;
891 } */
892 struct proc *p = l->l_proc;
893 int error, flags;
894
895 flags = SCARG(uap, flags);
896
897 if (flags == 0 ||
898 (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
899 return (EINVAL);
900
901 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
902 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
903 return (error);
904 }
905
906 /*
907 * sys_munlockall: unlock all pages mapped into an address space.
908 */
909
910 int
911 sys_munlockall(struct lwp *l, const void *v, register_t *retval)
912 {
913 struct proc *p = l->l_proc;
914
915 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
916 return (0);
917 }
918
919 /*
920 * uvm_mmap: internal version of mmap
921 *
922 * - used by sys_mmap and various framebuffers
923 * - uobj is a struct uvm_object pointer or NULL for MAP_ANON
924 * - caller must page-align the file offset
925 */
926
927 int
928 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
929 vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj,
930 voff_t foff, vsize_t locklimit)
931 {
932 vaddr_t align = 0;
933 int error;
934 uvm_flag_t uvmflag = 0;
935
936 /*
937 * check params
938 */
939
940 if (size == 0)
941 return(0);
942 if (foff & PAGE_MASK)
943 return(EINVAL);
944 if ((prot & maxprot) != prot)
945 return(EINVAL);
946
947 /*
948 * for non-fixed mappings, round off the suggested address.
949 * for fixed mappings, check alignment and zap old mappings.
950 */
951
952 if ((flags & MAP_FIXED) == 0) {
953 *addr = round_page(*addr);
954 } else {
955 if (*addr & PAGE_MASK)
956 return(EINVAL);
957 uvmflag |= UVM_FLAG_FIXED;
958 (void) uvm_unmap(map, *addr, *addr + size);
959 }
960
961 /*
962 * Try to see if any requested alignment can even be attemped.
963 * Make sure we can express the alignment (asking for a >= 4GB
964 * alignment on an ILP32 architecure make no sense) and the
965 * alignment is at least for a page sized quanitiy. If the
966 * request was for a fixed mapping, make sure supplied address
967 * adheres to the request alignment.
968 */
969 align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT;
970 if (align) {
971 if (align >= sizeof(vaddr_t) * NBBY)
972 return(EINVAL);
973 align = 1L << align;
974 if (align < PAGE_SIZE)
975 return(EINVAL);
976 if (align >= vm_map_max(map))
977 return(ENOMEM);
978 if (flags & MAP_FIXED) {
979 if ((*addr & (align-1)) != 0)
980 return(EINVAL);
981 align = 0;
982 }
983 }
984
985 /*
986 * check resource limits
987 */
988
989 if (!VM_MAP_IS_KERNEL(map) &&
990 (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) >
991 curproc->p_rlimit[RLIMIT_AS].rlim_cur))
992 return ENOMEM;
993
994 /*
995 * handle anon vs. non-anon mappings. for non-anon mappings attach
996 * to underlying vm object.
997 */
998
999 if (flags & MAP_ANON) {
1000 KASSERT(uobj == NULL);
1001 foff = UVM_UNKNOWN_OFFSET;
1002 if ((flags & MAP_SHARED) == 0)
1003 /* XXX: defer amap create */
1004 uvmflag |= UVM_FLAG_COPYONW;
1005 else
1006 /* shared: create amap now */
1007 uvmflag |= UVM_FLAG_OVERLAY;
1008
1009 } else {
1010 KASSERT(uobj != NULL);
1011 if ((flags & MAP_SHARED) == 0) {
1012 uvmflag |= UVM_FLAG_COPYONW;
1013 }
1014 }
1015
1016 uvmflag = UVM_MAPFLAG(prot, maxprot,
1017 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
1018 advice, uvmflag);
1019 error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
1020 if (error) {
1021 if (uobj)
1022 uobj->pgops->pgo_detach(uobj);
1023 return error;
1024 }
1025
1026 /*
1027 * POSIX 1003.1b -- if our address space was configured
1028 * to lock all future mappings, wire the one we just made.
1029 *
1030 * Also handle the MAP_WIRED flag here.
1031 */
1032
1033 if (prot == VM_PROT_NONE) {
1034
1035 /*
1036 * No more work to do in this case.
1037 */
1038
1039 return (0);
1040 }
1041 if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) {
1042 vm_map_lock(map);
1043 if (atop(size) + uvmexp.wired > uvmexp.wiredmax ||
1044 (locklimit != 0 &&
1045 size + ptoa(pmap_wired_count(vm_map_pmap(map))) >
1046 locklimit)) {
1047 vm_map_unlock(map);
1048 uvm_unmap(map, *addr, *addr + size);
1049 return ENOMEM;
1050 }
1051
1052 /*
1053 * uvm_map_pageable() always returns the map unlocked.
1054 */
1055
1056 error = uvm_map_pageable(map, *addr, *addr + size,
1057 false, UVM_LK_ENTER);
1058 if (error) {
1059 uvm_unmap(map, *addr, *addr + size);
1060 return error;
1061 }
1062 return (0);
1063 }
1064 return 0;
1065 }
1066
1067 vaddr_t
1068 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown)
1069 {
1070
1071 if (topdown)
1072 return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz);
1073 else
1074 return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz);
1075 }
1076
1077 int
1078 uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev,
1079 off_t off)
1080 {
1081 struct uvm_object *uobj;
1082 int error, flags, prot;
1083
1084 flags = MAP_SHARED;
1085 prot = VM_PROT_READ | VM_PROT_WRITE;
1086 if (*addrp)
1087 flags |= MAP_FIXED;
1088 else
1089 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
1090 (vaddr_t)p->p_vmspace->vm_daddr, len,
1091 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
1092
1093 uobj = udv_attach(dev, prot, off, len);
1094 if (uobj == NULL)
1095 return EINVAL;
1096
1097 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
1098 (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM,
1099 uobj, off, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1100 return error;
1101 }
1102
1103 int
1104 uvm_mmap_anon(struct proc *p, void **addrp, size_t len)
1105 {
1106 int error, flags, prot;
1107
1108 flags = MAP_PRIVATE | MAP_ANON;
1109 prot = VM_PROT_READ | VM_PROT_WRITE;
1110 if (*addrp)
1111 flags |= MAP_FIXED;
1112 else
1113 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
1114 (vaddr_t)p->p_vmspace->vm_daddr, len,
1115 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
1116
1117 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
1118 (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL,
1119 NULL, 0, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1120 return error;
1121 }
1122