uvm_mmap.c revision 1.9 1 /* $NetBSD: uvm_mmap.c,v 1.9 1998/05/10 12:35:59 mrg Exp $ */
2
3 /*
4 * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
5 * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
6 */
7 /*
8 * Copyright (c) 1997 Charles D. Cranor and Washington University.
9 * Copyright (c) 1991, 1993 The Regents of the University of California.
10 * Copyright (c) 1988 University of Utah.
11 *
12 * All rights reserved.
13 *
14 * This code is derived from software contributed to Berkeley by
15 * the Systems Programming Group of the University of Utah Computer
16 * Science Department.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the Charles D. Cranor,
29 * Washington University, University of California, Berkeley and
30 * its contributors.
31 * 4. Neither the name of the University nor the names of its contributors
32 * may be used to endorse or promote products derived from this software
33 * without specific prior written permission.
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
36 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
40 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
41 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
43 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
44 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 * SUCH DAMAGE.
46 *
47 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
48 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
49 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
50 */
51
52 /*
53 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
54 * function.
55 */
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/file.h>
59 #include <sys/filedesc.h>
60 #include <sys/resourcevar.h>
61 #include <sys/mman.h>
62 #include <sys/mount.h>
63 #include <sys/proc.h>
64 #include <sys/malloc.h>
65 #include <sys/vnode.h>
66 #include <sys/conf.h>
67 #include <sys/stat.h>
68
69 #include <miscfs/specfs/specdev.h>
70
71 #include <vm/vm.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_kern.h>
74
75 #include <sys/syscallargs.h>
76
77 #include <uvm/uvm.h>
78 #include <uvm/uvm_device.h>
79 #include <uvm/uvm_vnode.h>
80
81
82 /*
83 * unimplemented VM system calls:
84 */
85
86 /*
87 * sys_sbrk: sbrk system call.
88 */
89
90 /* ARGSUSED */
91 int
92 sys_sbrk(p, v, retval)
93 struct proc *p;
94 void *v;
95 register_t *retval;
96 {
97 #if 0
98 struct sys_sbrk_args /* {
99 syscallarg(int) incr;
100 } */ *uap = v;
101 #endif
102
103 return (EOPNOTSUPP);
104 }
105
106 /*
107 * sys_sstk: sstk system call.
108 */
109
110 /* ARGSUSED */
111 int
112 sys_sstk(p, v, retval)
113 struct proc *p;
114 void *v;
115 register_t *retval;
116 {
117 #if 0
118 struct sys_sstk_args /* {
119 syscallarg(int) incr;
120 } */ *uap = v;
121 #endif
122
123 return (EOPNOTSUPP);
124 }
125
126 /*
127 * sys_madvise: give advice about memory usage.
128 */
129
130 /* ARGSUSED */
131 int
132 sys_madvise(p, v, retval)
133 struct proc *p;
134 void *v;
135 register_t *retval;
136 {
137 #if 0
138 struct sys_madvise_args /* {
139 syscallarg(caddr_t) addr;
140 syscallarg(size_t) len;
141 syscallarg(int) behav;
142 } */ *uap = v;
143 #endif
144
145 return (EOPNOTSUPP);
146 }
147
148 /*
149 * sys_mincore: determine if pages are in core or not.
150 */
151
152 /* ARGSUSED */
153 int
154 sys_mincore(p, v, retval)
155 struct proc *p;
156 void *v;
157 register_t *retval;
158 {
159 #if 0
160 struct sys_mincore_args /* {
161 syscallarg(caddr_t) addr;
162 syscallarg(size_t) len;
163 syscallarg(char *) vec;
164 } */ *uap = v;
165 #endif
166
167 return (EOPNOTSUPP);
168 }
169
170 #if 0
171 /*
172 * munmapfd: unmap file descriptor
173 *
174 * XXX: is this acutally a useful function? could it be useful?
175 */
176
177 void
178 munmapfd(p, fd)
179 struct proc *p;
180 int fd;
181 {
182
183 /*
184 * XXX should vm_deallocate any regions mapped to this file
185 */
186 p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
187 }
188 #endif
189
190 /*
191 * sys_mmap: mmap system call.
192 *
193 * => file offest and address may not be page aligned
194 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
195 * - if address isn't page aligned the mapping starts at trunc_page(addr)
196 * and the return value is adjusted up by the page offset.
197 */
198
199 int
200 sys_mmap(p, v, retval)
201 struct proc *p;
202 void *v;
203 register_t *retval;
204 {
205 register struct sys_mmap_args /* {
206 syscallarg(caddr_t) addr;
207 syscallarg(size_t) len;
208 syscallarg(int) prot;
209 syscallarg(int) flags;
210 syscallarg(int) fd;
211 syscallarg(long) pad;
212 syscallarg(off_t) pos;
213 } */ *uap = v;
214 vm_offset_t addr;
215 struct vattr va;
216 off_t pos;
217 vm_size_t size, pageoff;
218 vm_prot_t prot, maxprot;
219 int flags, fd;
220 vm_offset_t vm_min_address = VM_MIN_ADDRESS;
221 register struct filedesc *fdp = p->p_fd;
222 register struct file *fp;
223 struct vnode *vp;
224 caddr_t handle;
225 int error;
226
227 /*
228 * first, extract syscall args from the uap.
229 */
230
231 addr = (vm_offset_t) SCARG(uap, addr);
232 size = (vm_size_t) SCARG(uap, len);
233 prot = SCARG(uap, prot) & VM_PROT_ALL;
234 flags = SCARG(uap, flags);
235 fd = SCARG(uap, fd);
236 pos = SCARG(uap, pos);
237
238 /*
239 * make sure that the newsize fits within a vm_offset_t
240 * XXX: need to revise addressing data types
241 */
242 if (pos + size > (vm_offset_t)-PAGE_SIZE) {
243 #ifdef DEBUG
244 printf("mmap: pos=%qx, size=%x too big\n", pos, (int)size);
245 #endif
246 return (EINVAL);
247 }
248
249 /*
250 * align file position and save offset. adjust size.
251 */
252
253 pageoff = (pos & PAGE_MASK);
254 pos -= pageoff;
255 size += pageoff; /* add offset */
256 size = (vm_size_t) round_page(size); /* round up */
257 if ((ssize_t) size < 0)
258 return (EINVAL); /* don't allow wrap */
259
260 /*
261 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
262 */
263
264 if (flags & MAP_FIXED) {
265
266 /* ensure address and file offset are aligned properly */
267 addr -= pageoff;
268 if (addr & PAGE_MASK)
269 return (EINVAL);
270
271 if (VM_MAXUSER_ADDRESS > 0 &&
272 (addr + size) > VM_MAXUSER_ADDRESS)
273 return (EINVAL);
274 if (vm_min_address > 0 && addr < vm_min_address)
275 return (EINVAL);
276 if (addr > addr + size)
277 return (EINVAL); /* no wrapping! */
278
279 } else {
280
281 /*
282 * not fixed: make sure we skip over the largest possible heap.
283 * we will refine our guess later (e.g. to account for VAC, etc)
284 */
285 if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
286 addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
287 }
288
289 /*
290 * check for file mappings (i.e. not anonymous) and verify file.
291 */
292
293 if ((flags & MAP_ANON) == 0) {
294
295 if (fd < 0 || fd >= fdp->fd_nfiles)
296 return(EBADF); /* failed range check? */
297 fp = fdp->fd_ofiles[fd]; /* convert to file pointer */
298 if (fp == NULL)
299 return(EBADF);
300
301 if (fp->f_type != DTYPE_VNODE)
302 return (ENODEV); /* only mmap vnodes! */
303 vp = (struct vnode *)fp->f_data; /* convert to vnode */
304
305 if (vp->v_type != VREG && vp->v_type != VCHR)
306 return (ENODEV); /* only REG/CHR support mmap */
307
308 /* special case: catch SunOS style /dev/zero */
309 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
310 flags |= MAP_ANON;
311 goto is_anon;
312 }
313
314 /*
315 * Old programs may not select a specific sharing type, so
316 * default to an appropriate one.
317 *
318 * XXX: how does MAP_ANON fit in the picture?
319 */
320 if ((flags & (MAP_SHARED|MAP_PRIVATE|MAP_COPY)) == 0) {
321 #if defined(DEBUG)
322 printf("WARNING: defaulted mmap() share type to "
323 "%s (pid %d comm %s)\n", vp->v_type == VCHR ?
324 "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
325 p->p_comm);
326 #endif
327 if (vp->v_type == VCHR)
328 flags |= MAP_SHARED; /* for a device */
329 else
330 flags |= MAP_PRIVATE; /* for a file */
331 }
332
333 /*
334 * MAP_PRIVATE device mappings don't make sense (and aren't
335 * supported anyway). However, some programs rely on this,
336 * so just change it to MAP_SHARED.
337 */
338 if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
339 #if defined(DIAGNOSTIC)
340 printf("WARNING: converted MAP_PRIVATE device mapping "
341 "to MAP_SHARED (pid %d comm %s)\n", p->p_pid,
342 p->p_comm);
343 #endif
344 flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
345 }
346
347 /*
348 * now check protection
349 */
350
351 maxprot = VM_PROT_EXECUTE;
352
353 /* check read access */
354 if (fp->f_flag & FREAD)
355 maxprot |= VM_PROT_READ;
356 else if (prot & PROT_READ)
357 return (EACCES);
358
359 /* check write access, shared case first */
360 if (flags & MAP_SHARED) {
361 /*
362 * if the file is writable, only add PROT_WRITE to
363 * maxprot if the file is not immutable, append-only.
364 * otherwise, if we have asked for PROT_WRITE, return
365 * EPERM.
366 */
367 if (fp->f_flag & FWRITE) {
368 if ((error =
369 VOP_GETATTR(vp, &va, p->p_ucred, p)))
370 return (error);
371 if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
372 maxprot |= VM_PROT_WRITE;
373 else if (prot & PROT_WRITE)
374 return (EPERM);
375 }
376 else if (prot & PROT_WRITE)
377 return (EACCES);
378 } else {
379 /* MAP_PRIVATE mappings can always write to */
380 maxprot |= VM_PROT_WRITE;
381 }
382
383 /*
384 * set handle to vnode
385 */
386
387 handle = (caddr_t)vp;
388
389 } else { /* MAP_ANON case */
390
391 if (fd != -1)
392 return (EINVAL);
393
394 is_anon: /* label for SunOS style /dev/zero */
395 handle = NULL;
396 maxprot = VM_PROT_ALL;
397 pos = 0;
398 }
399
400 /*
401 * now let kernel internal function uvm_mmap do the work.
402 */
403
404 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
405 flags, handle, pos);
406
407 if (error == 0)
408 /* remember to add offset */
409 *retval = (register_t)(addr + pageoff);
410
411 return (error);
412 }
413
414 /*
415 * sys___msync13: the msync system call (a front-end for flush)
416 */
417
418 int
419 sys___msync13(p, v, retval)
420 struct proc *p;
421 void *v;
422 register_t *retval;
423 {
424 struct sys___msync13_args /* {
425 syscallarg(caddr_t) addr;
426 syscallarg(size_t) len;
427 syscallarg(int) flags;
428 } */ *uap = v;
429 vm_offset_t addr;
430 vm_size_t size, pageoff;
431 vm_map_t map;
432 int rv, flags, uvmflags;
433
434 /*
435 * extract syscall args from the uap
436 */
437
438 addr = (vm_offset_t)SCARG(uap, addr);
439 size = (vm_size_t)SCARG(uap, len);
440 flags = SCARG(uap, flags);
441
442 /* sanity check flags */
443 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
444 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
445 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
446 return (EINVAL);
447 if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
448 flags |= MS_SYNC;
449
450 /*
451 * align the address to a page boundary, and adjust the size accordingly
452 */
453
454 pageoff = (addr & PAGE_MASK);
455 addr -= pageoff;
456 size += pageoff;
457 size = (vm_size_t) round_page(size);
458
459 /* disallow wrap-around. */
460 if (addr + size < addr)
461 return (EINVAL);
462
463 /*
464 * get map
465 */
466
467 map = &p->p_vmspace->vm_map;
468
469 /*
470 * XXXCDC: do we really need this semantic?
471 *
472 * XXX Gak! If size is zero we are supposed to sync "all modified
473 * pages with the region containing addr". Unfortunately, we
474 * don't really keep track of individual mmaps so we approximate
475 * by flushing the range of the map entry containing addr.
476 * This can be incorrect if the region splits or is coalesced
477 * with a neighbor.
478 */
479 if (size == 0) {
480 vm_map_entry_t entry;
481
482 vm_map_lock_read(map);
483 rv = uvm_map_lookup_entry(map, addr, &entry);
484 if (rv == TRUE) {
485 addr = entry->start;
486 size = entry->end - entry->start;
487 }
488 vm_map_unlock_read(map);
489 if (rv == FALSE)
490 return (EINVAL);
491 }
492
493 /*
494 * translate MS_ flags into PGO_ flags
495 */
496 uvmflags = (flags & MS_INVALIDATE) ? PGO_FREE : 0;
497 if (flags & MS_SYNC)
498 uvmflags |= PGO_SYNCIO;
499 else
500 uvmflags |= PGO_SYNCIO; /* XXXCDC: force sync for now! */
501
502 /*
503 * doit!
504 */
505 rv = uvm_map_clean(map, addr, addr+size, uvmflags);
506
507 /*
508 * and return...
509 */
510 switch (rv) {
511 case KERN_SUCCESS:
512 return(0);
513 case KERN_INVALID_ADDRESS:
514 return (ENOMEM);
515 case KERN_FAILURE:
516 return (EIO);
517 case KERN_PAGES_LOCKED: /* XXXCDC: uvm doesn't return this */
518 return (EBUSY);
519 default:
520 return (EINVAL);
521 }
522 /*NOTREACHED*/
523 }
524
525 /*
526 * sys_munmap: unmap a users memory
527 */
528
529 int
530 sys_munmap(p, v, retval)
531 register struct proc *p;
532 void *v;
533 register_t *retval;
534 {
535 register struct sys_munmap_args /* {
536 syscallarg(caddr_t) addr;
537 syscallarg(size_t) len;
538 } */ *uap = v;
539 vm_offset_t addr;
540 vm_size_t size, pageoff;
541 vm_map_t map;
542 vm_offset_t vm_min_address = VM_MIN_ADDRESS;
543 struct vm_map_entry *dead_entries;
544
545 /*
546 * get syscall args...
547 */
548
549 addr = (vm_offset_t) SCARG(uap, addr);
550 size = (vm_size_t) SCARG(uap, len);
551
552 /*
553 * align the address to a page boundary, and adjust the size accordingly
554 */
555
556 pageoff = (addr & PAGE_MASK);
557 addr -= pageoff;
558 size += pageoff;
559 size = (vm_size_t) round_page(size);
560
561 if ((int)size < 0)
562 return (EINVAL);
563 if (size == 0)
564 return (0);
565
566 /*
567 * Check for illegal addresses. Watch out for address wrap...
568 * Note that VM_*_ADDRESS are not constants due to casts (argh).
569 */
570 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
571 return (EINVAL);
572 if (vm_min_address > 0 && addr < vm_min_address)
573 return (EINVAL);
574 if (addr > addr + size)
575 return (EINVAL);
576 map = &p->p_vmspace->vm_map;
577
578
579 vm_map_lock(map); /* lock map so we can checkprot */
580
581 /*
582 * interesting system call semantic: make sure entire range is
583 * allocated before allowing an unmap.
584 */
585
586 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
587 vm_map_unlock(map);
588 return (EINVAL);
589 }
590
591 /*
592 * doit!
593 */
594 (void) uvm_unmap_remove(map, addr, addr + size, 0, &dead_entries);
595
596 vm_map_unlock(map); /* and unlock */
597
598 if (dead_entries != NULL)
599 uvm_unmap_detach(dead_entries, 0);
600
601 return (0);
602 }
603
604 /*
605 * sys_mprotect: the mprotect system call
606 */
607
608 int
609 sys_mprotect(p, v, retval)
610 struct proc *p;
611 void *v;
612 register_t *retval;
613 {
614 struct sys_mprotect_args /* {
615 syscallarg(caddr_t) addr;
616 syscallarg(int) len;
617 syscallarg(int) prot;
618 } */ *uap = v;
619 vm_offset_t addr;
620 vm_size_t size, pageoff;
621 vm_prot_t prot;
622 int rv;
623
624 /*
625 * extract syscall args from uap
626 */
627
628 addr = (vm_offset_t)SCARG(uap, addr);
629 size = (vm_size_t)SCARG(uap, len);
630 prot = SCARG(uap, prot) & VM_PROT_ALL;
631
632 /*
633 * align the address to a page boundary, and adjust the size accordingly
634 */
635 pageoff = (addr & PAGE_MASK);
636 addr -= pageoff;
637 size += pageoff;
638 size = (vm_size_t) round_page(size);
639 if ((int)size < 0)
640 return (EINVAL);
641
642 /*
643 * doit
644 */
645
646 rv = uvm_map_protect(&p->p_vmspace->vm_map,
647 addr, addr+size, prot, FALSE);
648
649 if (rv == KERN_SUCCESS)
650 return (0);
651 if (rv == KERN_PROTECTION_FAILURE)
652 return (EACCES);
653 return (EINVAL);
654 }
655
656 /*
657 * sys_minherit: the minherit system call
658 */
659
660 int
661 sys_minherit(p, v, retval)
662 struct proc *p;
663 void *v;
664 register_t *retval;
665 {
666 struct sys_minherit_args /* {
667 syscallarg(caddr_t) addr;
668 syscallarg(int) len;
669 syscallarg(int) inherit;
670 } */ *uap = v;
671 vm_offset_t addr;
672 vm_size_t size, pageoff;
673 register vm_inherit_t inherit;
674
675 addr = (vm_offset_t)SCARG(uap, addr);
676 size = (vm_size_t)SCARG(uap, len);
677 inherit = SCARG(uap, inherit);
678 /*
679 * align the address to a page boundary, and adjust the size accordingly
680 */
681
682 pageoff = (addr & PAGE_MASK);
683 addr -= pageoff;
684 size += pageoff;
685 size = (vm_size_t) round_page(size);
686
687 if ((int)size < 0)
688 return (EINVAL);
689
690 switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
691 inherit)) {
692 case KERN_SUCCESS:
693 return (0);
694 case KERN_PROTECTION_FAILURE:
695 return (EACCES);
696 }
697 return (EINVAL);
698 }
699
700 /*
701 * sys_mlock: memory lock
702 */
703
704 int
705 sys_mlock(p, v, retval)
706 struct proc *p;
707 void *v;
708 register_t *retval;
709 {
710 struct sys_mlock_args /* {
711 syscallarg(caddr_t) addr;
712 syscallarg(size_t) len;
713 } */ *uap = v;
714 vm_offset_t addr;
715 vm_size_t size, pageoff;
716 int error;
717
718 /*
719 * extract syscall args from uap
720 */
721 addr = (vm_offset_t)SCARG(uap, addr);
722 size = (vm_size_t)SCARG(uap, len);
723
724 /*
725 * align the address to a page boundary and adjust the size accordingly
726 */
727 pageoff = (addr & PAGE_MASK);
728 addr -= pageoff;
729 size += pageoff;
730 size = (vm_size_t) round_page(size);
731
732 /* disallow wrap-around. */
733 if (addr + (int)size < addr)
734 return (EINVAL);
735
736 if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
737 return (EAGAIN);
738
739 #ifdef pmap_wired_count
740 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
741 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
742 return (EAGAIN);
743 #else
744 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
745 return (error);
746 #endif
747
748 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
749 return (error == KERN_SUCCESS ? 0 : ENOMEM);
750 }
751
752 /*
753 * sys_munlock: unlock wired pages
754 */
755
756 int
757 sys_munlock(p, v, retval)
758 struct proc *p;
759 void *v;
760 register_t *retval;
761 {
762 struct sys_munlock_args /* {
763 syscallarg(caddr_t) addr;
764 syscallarg(size_t) len;
765 } */ *uap = v;
766 vm_offset_t addr;
767 vm_size_t size, pageoff;
768 int error;
769
770 /*
771 * extract syscall args from uap
772 */
773
774 addr = (vm_offset_t)SCARG(uap, addr);
775 size = (vm_size_t)SCARG(uap, len);
776
777 /*
778 * align the address to a page boundary, and adjust the size accordingly
779 */
780 pageoff = (addr & PAGE_MASK);
781 addr -= pageoff;
782 size += pageoff;
783 size = (vm_size_t) round_page(size);
784
785 /* disallow wrap-around. */
786 if (addr + (int)size < addr)
787 return (EINVAL);
788
789 #ifndef pmap_wired_count
790 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
791 return (error);
792 #endif
793
794 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
795 return (error == KERN_SUCCESS ? 0 : ENOMEM);
796 }
797
798 /*
799 * uvm_mmap: internal version of mmap
800 *
801 * - used by sys_mmap, exec, and sysv shm
802 * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true,
803 * sysv shm uses "named anonymous memory")
804 * - caller must page-align the file offset
805 */
806
807 int
808 uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
809 vm_map_t map;
810 vm_offset_t *addr;
811 vm_size_t size;
812 vm_prot_t prot, maxprot;
813 int flags;
814 caddr_t handle; /* XXX: VNODE? */
815 vm_offset_t foff;
816 {
817 struct uvm_object *uobj;
818 struct vnode *vp;
819 int retval;
820 int advice = UVM_ADV_NORMAL;
821 uvm_flag_t uvmflag = 0;
822
823 /*
824 * check params
825 */
826
827 if (size == 0)
828 return(0);
829 if (foff & PAGE_MASK)
830 return(EINVAL);
831 if ((prot & maxprot) != prot)
832 return(EINVAL);
833
834 /*
835 * for non-fixed mappings, round off the suggested address.
836 * for fixed mappings, check alignment and zap old mappings.
837 */
838
839 if ((flags & MAP_FIXED) == 0) {
840 *addr = round_page(*addr); /* round */
841 } else {
842
843 if (*addr & PAGE_MASK)
844 return(EINVAL);
845 uvmflag |= UVM_FLAG_FIXED;
846 (void) uvm_unmap(map, *addr, *addr + size, 0); /* zap! */
847 }
848
849 /*
850 * handle anon vs. non-anon mappings. for non-anon mappings attach
851 * to underlying vm object.
852 */
853
854 if (flags & MAP_ANON) {
855
856 foff = UVM_UNKNOWN_OFFSET;
857 uobj = NULL;
858 if ((flags & MAP_SHARED) == 0)
859 /* XXX: defer amap create */
860 uvmflag |= UVM_FLAG_COPYONW;
861 else
862 /* shared: create amap now */
863 uvmflag |= UVM_FLAG_OVERLAY;
864
865 } else {
866
867 vp = (struct vnode *) handle; /* get vnode */
868 if (vp->v_type != VCHR) {
869 uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
870 maxprot : (maxprot & ~VM_PROT_WRITE));
871
872 /*
873 * XXXCDC: hack from old code
874 * don't allow vnodes which have been mapped
875 * shared-writeable to persist [forces them to be
876 * flushed out when last reference goes].
877 * XXXCDC: interesting side effect: avoids a bug.
878 * note that in WRITE [ufs_readwrite.c] that we
879 * allocate buffer, uncache, and then do the write.
880 * the problem with this is that if the uncache causes
881 * VM data to be flushed to the same area of the file
882 * we are writing to... in that case we've got the
883 * buffer locked and our process goes to sleep forever.
884 *
885 * XXXCDC: checking maxprot protects us from the
886 * "persistbug" program but this is not a long term
887 * solution.
888 *
889 * XXXCDC: we don't bother calling uncache with the vp
890 * VOP_LOCKed since we know that we are already
891 * holding a valid reference to the uvn (from the
892 * uvn_attach above), and thus it is impossible for
893 * the uncache to kill the uvn and trigger I/O.
894 */
895 if (flags & MAP_SHARED) {
896 if ((prot & VM_PROT_WRITE) ||
897 (maxprot & VM_PROT_WRITE)) {
898 uvm_vnp_uncache(vp);
899 }
900 }
901
902 } else {
903 uobj = udv_attach((void *) &vp->v_rdev,
904 (flags & MAP_SHARED) ?
905 maxprot : (maxprot & ~VM_PROT_WRITE));
906 advice = UVM_ADV_RANDOM;
907 }
908
909 if (uobj == NULL)
910 return((vp->v_type == VCHR) ? EINVAL : ENOMEM);
911
912 if ((flags & MAP_SHARED) == 0)
913 uvmflag |= UVM_FLAG_COPYONW;
914 }
915
916 /*
917 * set up mapping flags
918 */
919
920 uvmflag = UVM_MAPFLAG(prot, maxprot,
921 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
922 advice, uvmflag);
923
924 /*
925 * do it!
926 */
927
928 retval = uvm_map(map, addr, size, uobj, foff, uvmflag);
929
930 if (retval == KERN_SUCCESS)
931 return(0);
932
933 /*
934 * errors: first detach from the uobj, if any.
935 */
936
937 if (uobj)
938 uobj->pgops->pgo_detach(uobj);
939
940 switch (retval) {
941 case KERN_INVALID_ADDRESS:
942 case KERN_NO_SPACE:
943 return(ENOMEM);
944 case KERN_PROTECTION_FAILURE:
945 return(EACCES);
946 }
947 return(EINVAL);
948 }
949