uvm_mmap.c revision 1.22 1 /* $NetBSD: uvm_mmap.c,v 1.22 1999/06/15 23:27:47 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993 The Regents of the University of California.
6 * Copyright (c) 1988 University of Utah.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * the Systems Programming Group of the University of Utah Computer
12 * Science Department.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 * must display the following acknowledgement:
24 * This product includes software developed by the Charles D. Cranor,
25 * Washington University, University of California, Berkeley and
26 * its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 * may be used to endorse or promote products derived from this software
29 * without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
44 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
45 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
46 */
47
48 /*
49 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
50 * function.
51 */
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/file.h>
55 #include <sys/filedesc.h>
56 #include <sys/resourcevar.h>
57 #include <sys/mman.h>
58 #include <sys/mount.h>
59 #include <sys/proc.h>
60 #include <sys/malloc.h>
61 #include <sys/vnode.h>
62 #include <sys/conf.h>
63 #include <sys/stat.h>
64
65 #include <miscfs/specfs/specdev.h>
66
67 #include <vm/vm.h>
68 #include <vm/vm_page.h>
69 #include <vm/vm_kern.h>
70
71 #include <sys/syscallargs.h>
72
73 #include <uvm/uvm.h>
74 #include <uvm/uvm_device.h>
75 #include <uvm/uvm_vnode.h>
76
77
78 /*
79 * unimplemented VM system calls:
80 */
81
82 /*
83 * sys_sbrk: sbrk system call.
84 */
85
86 /* ARGSUSED */
87 int
88 sys_sbrk(p, v, retval)
89 struct proc *p;
90 void *v;
91 register_t *retval;
92 {
93 #if 0
94 struct sys_sbrk_args /* {
95 syscallarg(int) incr;
96 } */ *uap = v;
97 #endif
98
99 return (ENOSYS);
100 }
101
102 /*
103 * sys_sstk: sstk system call.
104 */
105
106 /* ARGSUSED */
107 int
108 sys_sstk(p, v, retval)
109 struct proc *p;
110 void *v;
111 register_t *retval;
112 {
113 #if 0
114 struct sys_sstk_args /* {
115 syscallarg(int) incr;
116 } */ *uap = v;
117 #endif
118
119 return (ENOSYS);
120 }
121
122 /*
123 * sys_mincore: determine if pages are in core or not.
124 */
125
126 /* ARGSUSED */
127 int
128 sys_mincore(p, v, retval)
129 struct proc *p;
130 void *v;
131 register_t *retval;
132 {
133 struct sys_mincore_args /* {
134 syscallarg(void *) addr;
135 syscallarg(size_t) len;
136 syscallarg(char *) vec;
137 } */ *uap = v;
138 vm_page_t m;
139 char *vec, pgi;
140 struct uvm_object *uobj;
141 struct vm_amap *amap;
142 struct vm_anon *anon;
143 vm_map_entry_t entry;
144 vaddr_t start, end, lim;
145 vm_map_t map;
146 vsize_t len;
147 int error = 0, npgs;
148
149 map = &p->p_vmspace->vm_map;
150
151 start = (vaddr_t)SCARG(uap, addr);
152 len = SCARG(uap, len);
153 vec = SCARG(uap, vec);
154
155 if (start & PAGE_MASK)
156 return (EINVAL);
157 len = round_page(len);
158 end = start + len;
159 if (end <= start)
160 return (EINVAL);
161
162 npgs = len >> PAGE_SHIFT;
163
164 if (uvm_useracc(vec, npgs, B_WRITE) == FALSE)
165 return (EFAULT);
166
167 /*
168 * Lock down vec, so our returned status isn't outdated by
169 * storing the status byte for a page.
170 */
171 uvm_vslock(p, vec, npgs, VM_PROT_WRITE);
172
173 vm_map_lock_read(map);
174
175 if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
176 error = ENOMEM;
177 goto out;
178 }
179
180 for (/* nothing */;
181 entry != &map->header && entry->start < end;
182 entry = entry->next) {
183 #ifdef DIAGNOSTIC
184 if (UVM_ET_ISSUBMAP(entry))
185 panic("mincore: user map has submap");
186 if (start < entry->start)
187 panic("mincore: hole");
188 #endif
189 /* Make sure there are no holes. */
190 if (entry->end < end &&
191 (entry->next == &map->header ||
192 entry->next->start > entry->end)) {
193 error = ENOMEM;
194 goto out;
195 }
196
197 lim = end < entry->end ? end : entry->end;
198
199 /*
200 * Special case for mapped devices; these are always
201 * considered resident.
202 */
203 if (UVM_ET_ISOBJ(entry)) {
204 extern struct uvm_pagerops uvm_deviceops; /* XXX */
205 #ifdef DIAGNOSTIC
206 if (UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj))
207 panic("mincore: user map has kernel object");
208 #endif
209 if (entry->object.uvm_obj->pgops == &uvm_deviceops) {
210 for (/* nothing */; start < lim;
211 start += PAGE_SIZE, vec++)
212 subyte(vec, 1);
213 continue;
214 }
215 }
216
217 uobj = entry->object.uvm_obj; /* top layer */
218 amap = entry->aref.ar_amap; /* bottom layer */
219
220 if (amap != NULL)
221 amap_lock(amap);
222 if (uobj != NULL)
223 simple_lock(&uobj->vmobjlock);
224
225 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
226 pgi = 0;
227 if (amap != NULL) {
228 /* Check the top layer first. */
229 anon = amap_lookup(&entry->aref,
230 start - entry->start);
231 /* Don't need to lock anon here. */
232 if (anon != NULL && anon->u.an_page != NULL) {
233 /*
234 * Anon has the page for this entry
235 * offset.
236 */
237 pgi = 1;
238 }
239 }
240
241 if (uobj != NULL && pgi == 0) {
242 /* Check the bottom layer. */
243 m = uvm_pagelookup(uobj,
244 entry->offset + (start - entry->start));
245 if (m != NULL) {
246 /*
247 * Object has the page for this entry
248 * offset.
249 */
250 pgi = 1;
251 }
252 }
253
254 (void) subyte(vec, pgi);
255 }
256
257 if (uobj != NULL)
258 simple_unlock(&obj->vmobjlock);
259 if (amap != NULL)
260 amap_unlock(amap);
261 }
262
263 out:
264 vm_map_unlock_read(map);
265 uvm_vsunlock(p, SCARG(uap, vec), npgs);
266 return (error);
267 }
268
269 #if 0
270 /*
271 * munmapfd: unmap file descriptor
272 *
273 * XXX: is this acutally a useful function? could it be useful?
274 */
275
276 void
277 munmapfd(p, fd)
278 struct proc *p;
279 int fd;
280 {
281
282 /*
283 * XXX should vm_deallocate any regions mapped to this file
284 */
285 p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
286 }
287 #endif
288
289 /*
290 * sys_mmap: mmap system call.
291 *
292 * => file offest and address may not be page aligned
293 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
294 * - if address isn't page aligned the mapping starts at trunc_page(addr)
295 * and the return value is adjusted up by the page offset.
296 */
297
298 int
299 sys_mmap(p, v, retval)
300 struct proc *p;
301 void *v;
302 register_t *retval;
303 {
304 register struct sys_mmap_args /* {
305 syscallarg(caddr_t) addr;
306 syscallarg(size_t) len;
307 syscallarg(int) prot;
308 syscallarg(int) flags;
309 syscallarg(int) fd;
310 syscallarg(long) pad;
311 syscallarg(off_t) pos;
312 } */ *uap = v;
313 vaddr_t addr;
314 struct vattr va;
315 off_t pos;
316 vsize_t size, pageoff;
317 vm_prot_t prot, maxprot;
318 int flags, fd;
319 vaddr_t vm_min_address = VM_MIN_ADDRESS;
320 register struct filedesc *fdp = p->p_fd;
321 register struct file *fp;
322 struct vnode *vp;
323 caddr_t handle;
324 int error;
325
326 /*
327 * first, extract syscall args from the uap.
328 */
329
330 addr = (vaddr_t) SCARG(uap, addr);
331 size = (vsize_t) SCARG(uap, len);
332 prot = SCARG(uap, prot) & VM_PROT_ALL;
333 flags = SCARG(uap, flags);
334 fd = SCARG(uap, fd);
335 pos = SCARG(uap, pos);
336
337 /*
338 * make sure that the newsize fits within a vaddr_t
339 * XXX: need to revise addressing data types
340 */
341 if (pos + size > (vaddr_t)-PAGE_SIZE) {
342 #ifdef DEBUG
343 printf("mmap: pos=%qx, size=%lx too big\n", (long long)pos,
344 (long)size);
345 #endif
346 return (EINVAL);
347 }
348
349 /*
350 * align file position and save offset. adjust size.
351 */
352
353 pageoff = (pos & PAGE_MASK);
354 pos -= pageoff;
355 size += pageoff; /* add offset */
356 size = (vsize_t) round_page(size); /* round up */
357 if ((ssize_t) size < 0)
358 return (EINVAL); /* don't allow wrap */
359
360 /*
361 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
362 */
363
364 if (flags & MAP_FIXED) {
365
366 /* ensure address and file offset are aligned properly */
367 addr -= pageoff;
368 if (addr & PAGE_MASK)
369 return (EINVAL);
370
371 if (VM_MAXUSER_ADDRESS > 0 &&
372 (addr + size) > VM_MAXUSER_ADDRESS)
373 return (EINVAL);
374 if (vm_min_address > 0 && addr < vm_min_address)
375 return (EINVAL);
376 if (addr > addr + size)
377 return (EINVAL); /* no wrapping! */
378
379 } else {
380
381 /*
382 * not fixed: make sure we skip over the largest possible heap.
383 * we will refine our guess later (e.g. to account for VAC, etc)
384 */
385 if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
386 addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
387 }
388
389 /*
390 * check for file mappings (i.e. not anonymous) and verify file.
391 */
392
393 if ((flags & MAP_ANON) == 0) {
394
395 if (fd < 0 || fd >= fdp->fd_nfiles)
396 return(EBADF); /* failed range check? */
397 fp = fdp->fd_ofiles[fd]; /* convert to file pointer */
398 if (fp == NULL)
399 return(EBADF);
400
401 if (fp->f_type != DTYPE_VNODE)
402 return (ENODEV); /* only mmap vnodes! */
403 vp = (struct vnode *)fp->f_data; /* convert to vnode */
404
405 if (vp->v_type != VREG && vp->v_type != VCHR &&
406 vp->v_type != VBLK)
407 return (ENODEV); /* only REG/CHR/BLK support mmap */
408
409 /* special case: catch SunOS style /dev/zero */
410 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
411 flags |= MAP_ANON;
412 goto is_anon;
413 }
414
415 /*
416 * Old programs may not select a specific sharing type, so
417 * default to an appropriate one.
418 *
419 * XXX: how does MAP_ANON fit in the picture?
420 */
421 if ((flags & (MAP_SHARED|MAP_PRIVATE|MAP_COPY)) == 0) {
422 #if defined(DEBUG)
423 printf("WARNING: defaulted mmap() share type to "
424 "%s (pid %d comm %s)\n", vp->v_type == VCHR ?
425 "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
426 p->p_comm);
427 #endif
428 if (vp->v_type == VCHR)
429 flags |= MAP_SHARED; /* for a device */
430 else
431 flags |= MAP_PRIVATE; /* for a file */
432 }
433
434 /*
435 * MAP_PRIVATE device mappings don't make sense (and aren't
436 * supported anyway). However, some programs rely on this,
437 * so just change it to MAP_SHARED.
438 */
439 if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
440 #if defined(DIAGNOSTIC)
441 printf("WARNING: converted MAP_PRIVATE device mapping "
442 "to MAP_SHARED (pid %d comm %s)\n", p->p_pid,
443 p->p_comm);
444 #endif
445 flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
446 }
447
448 /*
449 * now check protection
450 */
451
452 maxprot = VM_PROT_EXECUTE;
453
454 /* check read access */
455 if (fp->f_flag & FREAD)
456 maxprot |= VM_PROT_READ;
457 else if (prot & PROT_READ)
458 return (EACCES);
459
460 /* check write access, shared case first */
461 if (flags & MAP_SHARED) {
462 /*
463 * if the file is writable, only add PROT_WRITE to
464 * maxprot if the file is not immutable, append-only.
465 * otherwise, if we have asked for PROT_WRITE, return
466 * EPERM.
467 */
468 if (fp->f_flag & FWRITE) {
469 if ((error =
470 VOP_GETATTR(vp, &va, p->p_ucred, p)))
471 return (error);
472 if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
473 maxprot |= VM_PROT_WRITE;
474 else if (prot & PROT_WRITE)
475 return (EPERM);
476 }
477 else if (prot & PROT_WRITE)
478 return (EACCES);
479 } else {
480 /* MAP_PRIVATE mappings can always write to */
481 maxprot |= VM_PROT_WRITE;
482 }
483
484 /*
485 * set handle to vnode
486 */
487
488 handle = (caddr_t)vp;
489
490 } else { /* MAP_ANON case */
491
492 if (fd != -1)
493 return (EINVAL);
494
495 is_anon: /* label for SunOS style /dev/zero */
496 handle = NULL;
497 maxprot = VM_PROT_ALL;
498 pos = 0;
499 }
500
501 /*
502 * now let kernel internal function uvm_mmap do the work.
503 */
504
505 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
506 flags, handle, pos);
507
508 if (error == 0)
509 /* remember to add offset */
510 *retval = (register_t)(addr + pageoff);
511
512 return (error);
513 }
514
515 /*
516 * sys___msync13: the msync system call (a front-end for flush)
517 */
518
519 int
520 sys___msync13(p, v, retval)
521 struct proc *p;
522 void *v;
523 register_t *retval;
524 {
525 struct sys___msync13_args /* {
526 syscallarg(caddr_t) addr;
527 syscallarg(size_t) len;
528 syscallarg(int) flags;
529 } */ *uap = v;
530 vaddr_t addr;
531 vsize_t size, pageoff;
532 vm_map_t map;
533 int rv, flags, uvmflags;
534
535 /*
536 * extract syscall args from the uap
537 */
538
539 addr = (vaddr_t)SCARG(uap, addr);
540 size = (vsize_t)SCARG(uap, len);
541 flags = SCARG(uap, flags);
542
543 /* sanity check flags */
544 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
545 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
546 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
547 return (EINVAL);
548 if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
549 flags |= MS_SYNC;
550
551 /*
552 * align the address to a page boundary, and adjust the size accordingly
553 */
554
555 pageoff = (addr & PAGE_MASK);
556 addr -= pageoff;
557 size += pageoff;
558 size = (vsize_t) round_page(size);
559
560 /* disallow wrap-around. */
561 if (addr + size < addr)
562 return (EINVAL);
563
564 /*
565 * get map
566 */
567
568 map = &p->p_vmspace->vm_map;
569
570 /*
571 * XXXCDC: do we really need this semantic?
572 *
573 * XXX Gak! If size is zero we are supposed to sync "all modified
574 * pages with the region containing addr". Unfortunately, we
575 * don't really keep track of individual mmaps so we approximate
576 * by flushing the range of the map entry containing addr.
577 * This can be incorrect if the region splits or is coalesced
578 * with a neighbor.
579 */
580 if (size == 0) {
581 vm_map_entry_t entry;
582
583 vm_map_lock_read(map);
584 rv = uvm_map_lookup_entry(map, addr, &entry);
585 if (rv == TRUE) {
586 addr = entry->start;
587 size = entry->end - entry->start;
588 }
589 vm_map_unlock_read(map);
590 if (rv == FALSE)
591 return (EINVAL);
592 }
593
594 /*
595 * translate MS_ flags into PGO_ flags
596 */
597 uvmflags = (flags & MS_INVALIDATE) ? PGO_FREE : 0;
598 if (flags & MS_SYNC)
599 uvmflags |= PGO_SYNCIO;
600 else
601 uvmflags |= PGO_SYNCIO; /* XXXCDC: force sync for now! */
602
603 /*
604 * doit!
605 */
606 rv = uvm_map_clean(map, addr, addr+size, uvmflags);
607
608 /*
609 * and return...
610 */
611 switch (rv) {
612 case KERN_SUCCESS:
613 return(0);
614 case KERN_INVALID_ADDRESS:
615 return (ENOMEM);
616 case KERN_FAILURE:
617 return (EIO);
618 case KERN_PAGES_LOCKED: /* XXXCDC: uvm doesn't return this */
619 return (EBUSY);
620 default:
621 return (EINVAL);
622 }
623 /*NOTREACHED*/
624 }
625
626 /*
627 * sys_munmap: unmap a users memory
628 */
629
630 int
631 sys_munmap(p, v, retval)
632 register struct proc *p;
633 void *v;
634 register_t *retval;
635 {
636 register struct sys_munmap_args /* {
637 syscallarg(caddr_t) addr;
638 syscallarg(size_t) len;
639 } */ *uap = v;
640 vaddr_t addr;
641 vsize_t size, pageoff;
642 vm_map_t map;
643 vaddr_t vm_min_address = VM_MIN_ADDRESS;
644 struct vm_map_entry *dead_entries;
645
646 /*
647 * get syscall args...
648 */
649
650 addr = (vaddr_t) SCARG(uap, addr);
651 size = (vsize_t) SCARG(uap, len);
652
653 /*
654 * align the address to a page boundary, and adjust the size accordingly
655 */
656
657 pageoff = (addr & PAGE_MASK);
658 addr -= pageoff;
659 size += pageoff;
660 size = (vsize_t) round_page(size);
661
662 if ((int)size < 0)
663 return (EINVAL);
664 if (size == 0)
665 return (0);
666
667 /*
668 * Check for illegal addresses. Watch out for address wrap...
669 * Note that VM_*_ADDRESS are not constants due to casts (argh).
670 */
671 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
672 return (EINVAL);
673 if (vm_min_address > 0 && addr < vm_min_address)
674 return (EINVAL);
675 if (addr > addr + size)
676 return (EINVAL);
677 map = &p->p_vmspace->vm_map;
678
679
680 vm_map_lock(map); /* lock map so we can checkprot */
681
682 /*
683 * interesting system call semantic: make sure entire range is
684 * allocated before allowing an unmap.
685 */
686
687 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
688 vm_map_unlock(map);
689 return (EINVAL);
690 }
691
692 /*
693 * doit!
694 */
695 (void) uvm_unmap_remove(map, addr, addr + size, &dead_entries);
696
697 vm_map_unlock(map); /* and unlock */
698
699 if (dead_entries != NULL)
700 uvm_unmap_detach(dead_entries, 0);
701
702 return (0);
703 }
704
705 /*
706 * sys_mprotect: the mprotect system call
707 */
708
709 int
710 sys_mprotect(p, v, retval)
711 struct proc *p;
712 void *v;
713 register_t *retval;
714 {
715 struct sys_mprotect_args /* {
716 syscallarg(caddr_t) addr;
717 syscallarg(int) len;
718 syscallarg(int) prot;
719 } */ *uap = v;
720 vaddr_t addr;
721 vsize_t size, pageoff;
722 vm_prot_t prot;
723 int rv;
724
725 /*
726 * extract syscall args from uap
727 */
728
729 addr = (vaddr_t)SCARG(uap, addr);
730 size = (vsize_t)SCARG(uap, len);
731 prot = SCARG(uap, prot) & VM_PROT_ALL;
732
733 /*
734 * align the address to a page boundary, and adjust the size accordingly
735 */
736 pageoff = (addr & PAGE_MASK);
737 addr -= pageoff;
738 size += pageoff;
739 size = (vsize_t) round_page(size);
740 if ((int)size < 0)
741 return (EINVAL);
742
743 /*
744 * doit
745 */
746
747 rv = uvm_map_protect(&p->p_vmspace->vm_map,
748 addr, addr+size, prot, FALSE);
749
750 if (rv == KERN_SUCCESS)
751 return (0);
752 if (rv == KERN_PROTECTION_FAILURE)
753 return (EACCES);
754 return (EINVAL);
755 }
756
757 /*
758 * sys_minherit: the minherit system call
759 */
760
761 int
762 sys_minherit(p, v, retval)
763 struct proc *p;
764 void *v;
765 register_t *retval;
766 {
767 struct sys_minherit_args /* {
768 syscallarg(caddr_t) addr;
769 syscallarg(int) len;
770 syscallarg(int) inherit;
771 } */ *uap = v;
772 vaddr_t addr;
773 vsize_t size, pageoff;
774 register vm_inherit_t inherit;
775
776 addr = (vaddr_t)SCARG(uap, addr);
777 size = (vsize_t)SCARG(uap, len);
778 inherit = SCARG(uap, inherit);
779 /*
780 * align the address to a page boundary, and adjust the size accordingly
781 */
782
783 pageoff = (addr & PAGE_MASK);
784 addr -= pageoff;
785 size += pageoff;
786 size = (vsize_t) round_page(size);
787
788 if ((int)size < 0)
789 return (EINVAL);
790
791 switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
792 inherit)) {
793 case KERN_SUCCESS:
794 return (0);
795 case KERN_PROTECTION_FAILURE:
796 return (EACCES);
797 }
798 return (EINVAL);
799 }
800
801 /*
802 * sys_madvise: give advice about memory usage.
803 */
804
805 /* ARGSUSED */
806 int
807 sys_madvise(p, v, retval)
808 struct proc *p;
809 void *v;
810 register_t *retval;
811 {
812 struct sys_madvise_args /* {
813 syscallarg(caddr_t) addr;
814 syscallarg(size_t) len;
815 syscallarg(int) behav;
816 } */ *uap = v;
817 vaddr_t addr;
818 vsize_t size, pageoff;
819 int advice;
820
821 addr = (vaddr_t)SCARG(uap, addr);
822 size = (vsize_t)SCARG(uap, len);
823 advice = SCARG(uap, behav);
824
825 /*
826 * align the address to a page boundary, and adjust the size accordingly
827 */
828 pageoff = (addr & PAGE_MASK);
829 addr -= pageoff;
830 size += pageoff;
831 size = (vsize_t) round_page(size);
832
833 if ((int)size < 0)
834 return (EINVAL);
835
836 switch (uvm_map_advice(&p->p_vmspace->vm_map, addr, addr+size,
837 advice)) {
838 case KERN_SUCCESS:
839 return (0);
840 case KERN_PROTECTION_FAILURE:
841 return (EACCES);
842 }
843 return (EINVAL);
844 }
845
846 /*
847 * sys_mlock: memory lock
848 */
849
850 int
851 sys_mlock(p, v, retval)
852 struct proc *p;
853 void *v;
854 register_t *retval;
855 {
856 struct sys_mlock_args /* {
857 syscallarg(const void *) addr;
858 syscallarg(size_t) len;
859 } */ *uap = v;
860 vaddr_t addr;
861 vsize_t size, pageoff;
862 int error;
863
864 /*
865 * extract syscall args from uap
866 */
867 addr = (vaddr_t)SCARG(uap, addr);
868 size = (vsize_t)SCARG(uap, len);
869
870 /*
871 * align the address to a page boundary and adjust the size accordingly
872 */
873 pageoff = (addr & PAGE_MASK);
874 addr -= pageoff;
875 size += pageoff;
876 size = (vsize_t) round_page(size);
877
878 /* disallow wrap-around. */
879 if (addr + (int)size < addr)
880 return (EINVAL);
881
882 if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
883 return (EAGAIN);
884
885 #ifdef pmap_wired_count
886 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
887 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
888 return (EAGAIN);
889 #else
890 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
891 return (error);
892 #endif
893
894 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
895 return (error == KERN_SUCCESS ? 0 : ENOMEM);
896 }
897
898 /*
899 * sys_munlock: unlock wired pages
900 */
901
902 int
903 sys_munlock(p, v, retval)
904 struct proc *p;
905 void *v;
906 register_t *retval;
907 {
908 struct sys_munlock_args /* {
909 syscallarg(const void *) addr;
910 syscallarg(size_t) len;
911 } */ *uap = v;
912 vaddr_t addr;
913 vsize_t size, pageoff;
914 int error;
915
916 /*
917 * extract syscall args from uap
918 */
919
920 addr = (vaddr_t)SCARG(uap, addr);
921 size = (vsize_t)SCARG(uap, len);
922
923 /*
924 * align the address to a page boundary, and adjust the size accordingly
925 */
926 pageoff = (addr & PAGE_MASK);
927 addr -= pageoff;
928 size += pageoff;
929 size = (vsize_t) round_page(size);
930
931 /* disallow wrap-around. */
932 if (addr + (int)size < addr)
933 return (EINVAL);
934
935 #ifndef pmap_wired_count
936 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
937 return (error);
938 #endif
939
940 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
941 return (error == KERN_SUCCESS ? 0 : ENOMEM);
942 }
943
944 /*
945 * sys_mlockall: lock all pages mapped into an address space.
946 */
947
948 int
949 sys_mlockall(p, v, retval)
950 struct proc *p;
951 void *v;
952 register_t *retval;
953 {
954 struct sys_mlockall_args /* {
955 syscallarg(int) flags;
956 } */ *uap = v;
957 vsize_t limit;
958 int error, flags;
959
960 flags = SCARG(uap, flags);
961
962 if (flags == 0 ||
963 (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
964 return (EINVAL);
965
966 #ifdef pmap_wired_count
967 /* Actually checked in uvm_map_pageable_all() */
968 limit = p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur];
969 #else
970 limit = 0;
971 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
972 return (error);
973 #endif
974
975 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags, limit);
976 switch (error) {
977 case KERN_SUCCESS:
978 error = 0;
979 break;
980
981 case KERN_NO_SPACE: /* XXX overloaded */
982 error = ENOMEM;
983 break;
984
985 default:
986 /*
987 * "Some or all of the memory could not be locked when
988 * the call was made."
989 */
990 error = EAGAIN;
991 }
992
993 return (error);
994 }
995
996 /*
997 * sys_munlockall: unlock all pages mapped into an address space.
998 */
999
1000 int
1001 sys_munlockall(p, v, retval)
1002 struct proc *p;
1003 void *v;
1004 register_t *retval;
1005 {
1006
1007 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
1008 return (0);
1009 }
1010
1011 /*
1012 * uvm_mmap: internal version of mmap
1013 *
1014 * - used by sys_mmap, exec, and sysv shm
1015 * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true,
1016 * sysv shm uses "named anonymous memory")
1017 * - caller must page-align the file offset
1018 */
1019
1020 int
1021 uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
1022 vm_map_t map;
1023 vaddr_t *addr;
1024 vsize_t size;
1025 vm_prot_t prot, maxprot;
1026 int flags;
1027 caddr_t handle; /* XXX: VNODE? */
1028 vaddr_t foff;
1029 {
1030 struct uvm_object *uobj;
1031 struct vnode *vp;
1032 int retval;
1033 int advice = UVM_ADV_NORMAL;
1034 uvm_flag_t uvmflag = 0;
1035
1036 /*
1037 * check params
1038 */
1039
1040 if (size == 0)
1041 return(0);
1042 if (foff & PAGE_MASK)
1043 return(EINVAL);
1044 if ((prot & maxprot) != prot)
1045 return(EINVAL);
1046
1047 /*
1048 * for non-fixed mappings, round off the suggested address.
1049 * for fixed mappings, check alignment and zap old mappings.
1050 */
1051
1052 if ((flags & MAP_FIXED) == 0) {
1053 *addr = round_page(*addr); /* round */
1054 } else {
1055
1056 if (*addr & PAGE_MASK)
1057 return(EINVAL);
1058 uvmflag |= UVM_FLAG_FIXED;
1059 (void) uvm_unmap(map, *addr, *addr + size); /* zap! */
1060 }
1061
1062 /*
1063 * handle anon vs. non-anon mappings. for non-anon mappings attach
1064 * to underlying vm object.
1065 */
1066
1067 if (flags & MAP_ANON) {
1068
1069 foff = UVM_UNKNOWN_OFFSET;
1070 uobj = NULL;
1071 if ((flags & MAP_SHARED) == 0)
1072 /* XXX: defer amap create */
1073 uvmflag |= UVM_FLAG_COPYONW;
1074 else
1075 /* shared: create amap now */
1076 uvmflag |= UVM_FLAG_OVERLAY;
1077
1078 } else {
1079
1080 vp = (struct vnode *) handle; /* get vnode */
1081 if (vp->v_type != VCHR) {
1082 uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
1083 maxprot : (maxprot & ~VM_PROT_WRITE));
1084
1085 /*
1086 * XXXCDC: hack from old code
1087 * don't allow vnodes which have been mapped
1088 * shared-writeable to persist [forces them to be
1089 * flushed out when last reference goes].
1090 * XXXCDC: interesting side effect: avoids a bug.
1091 * note that in WRITE [ufs_readwrite.c] that we
1092 * allocate buffer, uncache, and then do the write.
1093 * the problem with this is that if the uncache causes
1094 * VM data to be flushed to the same area of the file
1095 * we are writing to... in that case we've got the
1096 * buffer locked and our process goes to sleep forever.
1097 *
1098 * XXXCDC: checking maxprot protects us from the
1099 * "persistbug" program but this is not a long term
1100 * solution.
1101 *
1102 * XXXCDC: we don't bother calling uncache with the vp
1103 * VOP_LOCKed since we know that we are already
1104 * holding a valid reference to the uvn (from the
1105 * uvn_attach above), and thus it is impossible for
1106 * the uncache to kill the uvn and trigger I/O.
1107 */
1108 if (flags & MAP_SHARED) {
1109 if ((prot & VM_PROT_WRITE) ||
1110 (maxprot & VM_PROT_WRITE)) {
1111 uvm_vnp_uncache(vp);
1112 }
1113 }
1114
1115 } else {
1116 uobj = udv_attach((void *) &vp->v_rdev,
1117 (flags & MAP_SHARED) ?
1118 maxprot : (maxprot & ~VM_PROT_WRITE), foff, size);
1119 advice = UVM_ADV_RANDOM;
1120 }
1121
1122 if (uobj == NULL)
1123 return((vp->v_type == VREG) ? ENOMEM : EINVAL);
1124
1125 if ((flags & MAP_SHARED) == 0)
1126 uvmflag |= UVM_FLAG_COPYONW;
1127 }
1128
1129 /*
1130 * set up mapping flags
1131 */
1132
1133 uvmflag = UVM_MAPFLAG(prot, maxprot,
1134 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
1135 advice, uvmflag);
1136
1137 /*
1138 * do it!
1139 */
1140
1141 retval = uvm_map(map, addr, size, uobj, foff, uvmflag);
1142
1143 if (retval == KERN_SUCCESS)
1144 return(0);
1145
1146 /*
1147 * errors: first detach from the uobj, if any.
1148 */
1149
1150 if (uobj)
1151 uobj->pgops->pgo_detach(uobj);
1152
1153 switch (retval) {
1154 case KERN_INVALID_ADDRESS:
1155 case KERN_NO_SPACE:
1156 return(ENOMEM);
1157 case KERN_PROTECTION_FAILURE:
1158 return(EACCES);
1159 }
1160 return(EINVAL);
1161 }
1162