uvm_mmap.c revision 1.20 1 /* $NetBSD: uvm_mmap.c,v 1.20 1999/05/03 09:08:28 mrg Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993 The Regents of the University of California.
6 * Copyright (c) 1988 University of Utah.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * the Systems Programming Group of the University of Utah Computer
12 * Science Department.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 * must display the following acknowledgement:
24 * This product includes software developed by the Charles D. Cranor,
25 * Washington University, University of California, Berkeley and
26 * its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 * may be used to endorse or promote products derived from this software
29 * without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
44 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
45 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
46 */
47
48 /*
49 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
50 * function.
51 */
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/file.h>
55 #include <sys/filedesc.h>
56 #include <sys/resourcevar.h>
57 #include <sys/mman.h>
58 #include <sys/mount.h>
59 #include <sys/proc.h>
60 #include <sys/malloc.h>
61 #include <sys/vnode.h>
62 #include <sys/conf.h>
63 #include <sys/stat.h>
64
65 #include <miscfs/specfs/specdev.h>
66
67 #include <vm/vm.h>
68 #include <vm/vm_page.h>
69 #include <vm/vm_kern.h>
70
71 #include <sys/syscallargs.h>
72
73 #include <uvm/uvm.h>
74 #include <uvm/uvm_device.h>
75 #include <uvm/uvm_vnode.h>
76
77
78 /*
79 * unimplemented VM system calls:
80 */
81
82 /*
83 * sys_sbrk: sbrk system call.
84 */
85
86 /* ARGSUSED */
87 int
88 sys_sbrk(p, v, retval)
89 struct proc *p;
90 void *v;
91 register_t *retval;
92 {
93 #if 0
94 struct sys_sbrk_args /* {
95 syscallarg(int) incr;
96 } */ *uap = v;
97 #endif
98
99 return (ENOSYS);
100 }
101
102 /*
103 * sys_sstk: sstk system call.
104 */
105
106 /* ARGSUSED */
107 int
108 sys_sstk(p, v, retval)
109 struct proc *p;
110 void *v;
111 register_t *retval;
112 {
113 #if 0
114 struct sys_sstk_args /* {
115 syscallarg(int) incr;
116 } */ *uap = v;
117 #endif
118
119 return (ENOSYS);
120 }
121
122 /*
123 * sys_madvise: give advice about memory usage.
124 */
125
126 /* ARGSUSED */
127 int
128 sys_madvise(p, v, retval)
129 struct proc *p;
130 void *v;
131 register_t *retval;
132 {
133 #if 0
134 struct sys_madvise_args /* {
135 syscallarg(caddr_t) addr;
136 syscallarg(size_t) len;
137 syscallarg(int) behav;
138 } */ *uap = v;
139 #endif
140
141 return (ENOSYS);
142 }
143
144 /*
145 * sys_mincore: determine if pages are in core or not.
146 */
147
148 /* ARGSUSED */
149 int
150 sys_mincore(p, v, retval)
151 struct proc *p;
152 void *v;
153 register_t *retval;
154 {
155 #if 0
156 struct sys_mincore_args /* {
157 syscallarg(caddr_t) addr;
158 syscallarg(size_t) len;
159 syscallarg(char *) vec;
160 } */ *uap = v;
161 #endif
162
163 return (ENOSYS);
164 }
165
166 #if 0
167 /*
168 * munmapfd: unmap file descriptor
169 *
170 * XXX: is this acutally a useful function? could it be useful?
171 */
172
173 void
174 munmapfd(p, fd)
175 struct proc *p;
176 int fd;
177 {
178
179 /*
180 * XXX should vm_deallocate any regions mapped to this file
181 */
182 p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
183 }
184 #endif
185
186 /*
187 * sys_mmap: mmap system call.
188 *
189 * => file offest and address may not be page aligned
190 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
191 * - if address isn't page aligned the mapping starts at trunc_page(addr)
192 * and the return value is adjusted up by the page offset.
193 */
194
195 int
196 sys_mmap(p, v, retval)
197 struct proc *p;
198 void *v;
199 register_t *retval;
200 {
201 register struct sys_mmap_args /* {
202 syscallarg(caddr_t) addr;
203 syscallarg(size_t) len;
204 syscallarg(int) prot;
205 syscallarg(int) flags;
206 syscallarg(int) fd;
207 syscallarg(long) pad;
208 syscallarg(off_t) pos;
209 } */ *uap = v;
210 vaddr_t addr;
211 struct vattr va;
212 off_t pos;
213 vsize_t size, pageoff;
214 vm_prot_t prot, maxprot;
215 int flags, fd;
216 vaddr_t vm_min_address = VM_MIN_ADDRESS;
217 register struct filedesc *fdp = p->p_fd;
218 register struct file *fp;
219 struct vnode *vp;
220 caddr_t handle;
221 int error;
222
223 /*
224 * first, extract syscall args from the uap.
225 */
226
227 addr = (vaddr_t) SCARG(uap, addr);
228 size = (vsize_t) SCARG(uap, len);
229 prot = SCARG(uap, prot) & VM_PROT_ALL;
230 flags = SCARG(uap, flags);
231 fd = SCARG(uap, fd);
232 pos = SCARG(uap, pos);
233
234 /*
235 * make sure that the newsize fits within a vaddr_t
236 * XXX: need to revise addressing data types
237 */
238 if (pos + size > (vaddr_t)-PAGE_SIZE) {
239 #ifdef DEBUG
240 printf("mmap: pos=%qx, size=%lx too big\n", (long long)pos,
241 (long)size);
242 #endif
243 return (EINVAL);
244 }
245
246 /*
247 * align file position and save offset. adjust size.
248 */
249
250 pageoff = (pos & PAGE_MASK);
251 pos -= pageoff;
252 size += pageoff; /* add offset */
253 size = (vsize_t) round_page(size); /* round up */
254 if ((ssize_t) size < 0)
255 return (EINVAL); /* don't allow wrap */
256
257 /*
258 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
259 */
260
261 if (flags & MAP_FIXED) {
262
263 /* ensure address and file offset are aligned properly */
264 addr -= pageoff;
265 if (addr & PAGE_MASK)
266 return (EINVAL);
267
268 if (VM_MAXUSER_ADDRESS > 0 &&
269 (addr + size) > VM_MAXUSER_ADDRESS)
270 return (EINVAL);
271 if (vm_min_address > 0 && addr < vm_min_address)
272 return (EINVAL);
273 if (addr > addr + size)
274 return (EINVAL); /* no wrapping! */
275
276 } else {
277
278 /*
279 * not fixed: make sure we skip over the largest possible heap.
280 * we will refine our guess later (e.g. to account for VAC, etc)
281 */
282 if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
283 addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
284 }
285
286 /*
287 * check for file mappings (i.e. not anonymous) and verify file.
288 */
289
290 if ((flags & MAP_ANON) == 0) {
291
292 if (fd < 0 || fd >= fdp->fd_nfiles)
293 return(EBADF); /* failed range check? */
294 fp = fdp->fd_ofiles[fd]; /* convert to file pointer */
295 if (fp == NULL)
296 return(EBADF);
297
298 if (fp->f_type != DTYPE_VNODE)
299 return (ENODEV); /* only mmap vnodes! */
300 vp = (struct vnode *)fp->f_data; /* convert to vnode */
301
302 if (vp->v_type != VREG && vp->v_type != VCHR &&
303 vp->v_type != VBLK)
304 return (ENODEV); /* only REG/CHR/BLK support mmap */
305
306 /* special case: catch SunOS style /dev/zero */
307 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
308 flags |= MAP_ANON;
309 goto is_anon;
310 }
311
312 /*
313 * Old programs may not select a specific sharing type, so
314 * default to an appropriate one.
315 *
316 * XXX: how does MAP_ANON fit in the picture?
317 */
318 if ((flags & (MAP_SHARED|MAP_PRIVATE|MAP_COPY)) == 0) {
319 #if defined(DEBUG)
320 printf("WARNING: defaulted mmap() share type to "
321 "%s (pid %d comm %s)\n", vp->v_type == VCHR ?
322 "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
323 p->p_comm);
324 #endif
325 if (vp->v_type == VCHR)
326 flags |= MAP_SHARED; /* for a device */
327 else
328 flags |= MAP_PRIVATE; /* for a file */
329 }
330
331 /*
332 * MAP_PRIVATE device mappings don't make sense (and aren't
333 * supported anyway). However, some programs rely on this,
334 * so just change it to MAP_SHARED.
335 */
336 if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
337 #if defined(DIAGNOSTIC)
338 printf("WARNING: converted MAP_PRIVATE device mapping "
339 "to MAP_SHARED (pid %d comm %s)\n", p->p_pid,
340 p->p_comm);
341 #endif
342 flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
343 }
344
345 /*
346 * now check protection
347 */
348
349 maxprot = VM_PROT_EXECUTE;
350
351 /* check read access */
352 if (fp->f_flag & FREAD)
353 maxprot |= VM_PROT_READ;
354 else if (prot & PROT_READ)
355 return (EACCES);
356
357 /* check write access, shared case first */
358 if (flags & MAP_SHARED) {
359 /*
360 * if the file is writable, only add PROT_WRITE to
361 * maxprot if the file is not immutable, append-only.
362 * otherwise, if we have asked for PROT_WRITE, return
363 * EPERM.
364 */
365 if (fp->f_flag & FWRITE) {
366 if ((error =
367 VOP_GETATTR(vp, &va, p->p_ucred, p)))
368 return (error);
369 if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
370 maxprot |= VM_PROT_WRITE;
371 else if (prot & PROT_WRITE)
372 return (EPERM);
373 }
374 else if (prot & PROT_WRITE)
375 return (EACCES);
376 } else {
377 /* MAP_PRIVATE mappings can always write to */
378 maxprot |= VM_PROT_WRITE;
379 }
380
381 /*
382 * set handle to vnode
383 */
384
385 handle = (caddr_t)vp;
386
387 } else { /* MAP_ANON case */
388
389 if (fd != -1)
390 return (EINVAL);
391
392 is_anon: /* label for SunOS style /dev/zero */
393 handle = NULL;
394 maxprot = VM_PROT_ALL;
395 pos = 0;
396 }
397
398 /*
399 * now let kernel internal function uvm_mmap do the work.
400 */
401
402 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
403 flags, handle, pos);
404
405 if (error == 0)
406 /* remember to add offset */
407 *retval = (register_t)(addr + pageoff);
408
409 return (error);
410 }
411
412 /*
413 * sys___msync13: the msync system call (a front-end for flush)
414 */
415
416 int
417 sys___msync13(p, v, retval)
418 struct proc *p;
419 void *v;
420 register_t *retval;
421 {
422 struct sys___msync13_args /* {
423 syscallarg(caddr_t) addr;
424 syscallarg(size_t) len;
425 syscallarg(int) flags;
426 } */ *uap = v;
427 vaddr_t addr;
428 vsize_t size, pageoff;
429 vm_map_t map;
430 int rv, flags, uvmflags;
431
432 /*
433 * extract syscall args from the uap
434 */
435
436 addr = (vaddr_t)SCARG(uap, addr);
437 size = (vsize_t)SCARG(uap, len);
438 flags = SCARG(uap, flags);
439
440 /* sanity check flags */
441 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
442 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
443 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
444 return (EINVAL);
445 if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
446 flags |= MS_SYNC;
447
448 /*
449 * align the address to a page boundary, and adjust the size accordingly
450 */
451
452 pageoff = (addr & PAGE_MASK);
453 addr -= pageoff;
454 size += pageoff;
455 size = (vsize_t) round_page(size);
456
457 /* disallow wrap-around. */
458 if (addr + size < addr)
459 return (EINVAL);
460
461 /*
462 * get map
463 */
464
465 map = &p->p_vmspace->vm_map;
466
467 /*
468 * XXXCDC: do we really need this semantic?
469 *
470 * XXX Gak! If size is zero we are supposed to sync "all modified
471 * pages with the region containing addr". Unfortunately, we
472 * don't really keep track of individual mmaps so we approximate
473 * by flushing the range of the map entry containing addr.
474 * This can be incorrect if the region splits or is coalesced
475 * with a neighbor.
476 */
477 if (size == 0) {
478 vm_map_entry_t entry;
479
480 vm_map_lock_read(map);
481 rv = uvm_map_lookup_entry(map, addr, &entry);
482 if (rv == TRUE) {
483 addr = entry->start;
484 size = entry->end - entry->start;
485 }
486 vm_map_unlock_read(map);
487 if (rv == FALSE)
488 return (EINVAL);
489 }
490
491 /*
492 * translate MS_ flags into PGO_ flags
493 */
494 uvmflags = (flags & MS_INVALIDATE) ? PGO_FREE : 0;
495 if (flags & MS_SYNC)
496 uvmflags |= PGO_SYNCIO;
497 else
498 uvmflags |= PGO_SYNCIO; /* XXXCDC: force sync for now! */
499
500 /*
501 * doit!
502 */
503 rv = uvm_map_clean(map, addr, addr+size, uvmflags);
504
505 /*
506 * and return...
507 */
508 switch (rv) {
509 case KERN_SUCCESS:
510 return(0);
511 case KERN_INVALID_ADDRESS:
512 return (ENOMEM);
513 case KERN_FAILURE:
514 return (EIO);
515 case KERN_PAGES_LOCKED: /* XXXCDC: uvm doesn't return this */
516 return (EBUSY);
517 default:
518 return (EINVAL);
519 }
520 /*NOTREACHED*/
521 }
522
523 /*
524 * sys_munmap: unmap a users memory
525 */
526
527 int
528 sys_munmap(p, v, retval)
529 register struct proc *p;
530 void *v;
531 register_t *retval;
532 {
533 register struct sys_munmap_args /* {
534 syscallarg(caddr_t) addr;
535 syscallarg(size_t) len;
536 } */ *uap = v;
537 vaddr_t addr;
538 vsize_t size, pageoff;
539 vm_map_t map;
540 vaddr_t vm_min_address = VM_MIN_ADDRESS;
541 struct vm_map_entry *dead_entries;
542
543 /*
544 * get syscall args...
545 */
546
547 addr = (vaddr_t) SCARG(uap, addr);
548 size = (vsize_t) SCARG(uap, len);
549
550 /*
551 * align the address to a page boundary, and adjust the size accordingly
552 */
553
554 pageoff = (addr & PAGE_MASK);
555 addr -= pageoff;
556 size += pageoff;
557 size = (vsize_t) round_page(size);
558
559 if ((int)size < 0)
560 return (EINVAL);
561 if (size == 0)
562 return (0);
563
564 /*
565 * Check for illegal addresses. Watch out for address wrap...
566 * Note that VM_*_ADDRESS are not constants due to casts (argh).
567 */
568 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
569 return (EINVAL);
570 if (vm_min_address > 0 && addr < vm_min_address)
571 return (EINVAL);
572 if (addr > addr + size)
573 return (EINVAL);
574 map = &p->p_vmspace->vm_map;
575
576
577 vm_map_lock(map); /* lock map so we can checkprot */
578
579 /*
580 * interesting system call semantic: make sure entire range is
581 * allocated before allowing an unmap.
582 */
583
584 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
585 vm_map_unlock(map);
586 return (EINVAL);
587 }
588
589 /*
590 * doit!
591 */
592 (void) uvm_unmap_remove(map, addr, addr + size, &dead_entries);
593
594 vm_map_unlock(map); /* and unlock */
595
596 if (dead_entries != NULL)
597 uvm_unmap_detach(dead_entries, 0);
598
599 return (0);
600 }
601
602 /*
603 * sys_mprotect: the mprotect system call
604 */
605
606 int
607 sys_mprotect(p, v, retval)
608 struct proc *p;
609 void *v;
610 register_t *retval;
611 {
612 struct sys_mprotect_args /* {
613 syscallarg(caddr_t) addr;
614 syscallarg(int) len;
615 syscallarg(int) prot;
616 } */ *uap = v;
617 vaddr_t addr;
618 vsize_t size, pageoff;
619 vm_prot_t prot;
620 int rv;
621
622 /*
623 * extract syscall args from uap
624 */
625
626 addr = (vaddr_t)SCARG(uap, addr);
627 size = (vsize_t)SCARG(uap, len);
628 prot = SCARG(uap, prot) & VM_PROT_ALL;
629
630 /*
631 * align the address to a page boundary, and adjust the size accordingly
632 */
633 pageoff = (addr & PAGE_MASK);
634 addr -= pageoff;
635 size += pageoff;
636 size = (vsize_t) round_page(size);
637 if ((int)size < 0)
638 return (EINVAL);
639
640 /*
641 * doit
642 */
643
644 rv = uvm_map_protect(&p->p_vmspace->vm_map,
645 addr, addr+size, prot, FALSE);
646
647 if (rv == KERN_SUCCESS)
648 return (0);
649 if (rv == KERN_PROTECTION_FAILURE)
650 return (EACCES);
651 return (EINVAL);
652 }
653
654 /*
655 * sys_minherit: the minherit system call
656 */
657
658 int
659 sys_minherit(p, v, retval)
660 struct proc *p;
661 void *v;
662 register_t *retval;
663 {
664 struct sys_minherit_args /* {
665 syscallarg(caddr_t) addr;
666 syscallarg(int) len;
667 syscallarg(int) inherit;
668 } */ *uap = v;
669 vaddr_t addr;
670 vsize_t size, pageoff;
671 register vm_inherit_t inherit;
672
673 addr = (vaddr_t)SCARG(uap, addr);
674 size = (vsize_t)SCARG(uap, len);
675 inherit = SCARG(uap, inherit);
676 /*
677 * align the address to a page boundary, and adjust the size accordingly
678 */
679
680 pageoff = (addr & PAGE_MASK);
681 addr -= pageoff;
682 size += pageoff;
683 size = (vsize_t) round_page(size);
684
685 if ((int)size < 0)
686 return (EINVAL);
687
688 switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
689 inherit)) {
690 case KERN_SUCCESS:
691 return (0);
692 case KERN_PROTECTION_FAILURE:
693 return (EACCES);
694 }
695 return (EINVAL);
696 }
697
698 /*
699 * sys_mlock: memory lock
700 */
701
702 int
703 sys_mlock(p, v, retval)
704 struct proc *p;
705 void *v;
706 register_t *retval;
707 {
708 struct sys_mlock_args /* {
709 syscallarg(const void *) addr;
710 syscallarg(size_t) len;
711 } */ *uap = v;
712 vaddr_t addr;
713 vsize_t size, pageoff;
714 int error;
715
716 /*
717 * extract syscall args from uap
718 */
719 addr = (vaddr_t)SCARG(uap, addr);
720 size = (vsize_t)SCARG(uap, len);
721
722 /*
723 * align the address to a page boundary and adjust the size accordingly
724 */
725 pageoff = (addr & PAGE_MASK);
726 addr -= pageoff;
727 size += pageoff;
728 size = (vsize_t) round_page(size);
729
730 /* disallow wrap-around. */
731 if (addr + (int)size < addr)
732 return (EINVAL);
733
734 if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
735 return (EAGAIN);
736
737 #ifdef pmap_wired_count
738 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
739 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
740 return (EAGAIN);
741 #else
742 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
743 return (error);
744 #endif
745
746 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
747 return (error == KERN_SUCCESS ? 0 : ENOMEM);
748 }
749
750 /*
751 * sys_munlock: unlock wired pages
752 */
753
754 int
755 sys_munlock(p, v, retval)
756 struct proc *p;
757 void *v;
758 register_t *retval;
759 {
760 struct sys_munlock_args /* {
761 syscallarg(const void *) addr;
762 syscallarg(size_t) len;
763 } */ *uap = v;
764 vaddr_t addr;
765 vsize_t size, pageoff;
766 int error;
767
768 /*
769 * extract syscall args from uap
770 */
771
772 addr = (vaddr_t)SCARG(uap, addr);
773 size = (vsize_t)SCARG(uap, len);
774
775 /*
776 * align the address to a page boundary, and adjust the size accordingly
777 */
778 pageoff = (addr & PAGE_MASK);
779 addr -= pageoff;
780 size += pageoff;
781 size = (vsize_t) round_page(size);
782
783 /* disallow wrap-around. */
784 if (addr + (int)size < addr)
785 return (EINVAL);
786
787 #ifndef pmap_wired_count
788 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
789 return (error);
790 #endif
791
792 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
793 return (error == KERN_SUCCESS ? 0 : ENOMEM);
794 }
795
796 /*
797 * uvm_mmap: internal version of mmap
798 *
799 * - used by sys_mmap, exec, and sysv shm
800 * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true,
801 * sysv shm uses "named anonymous memory")
802 * - caller must page-align the file offset
803 */
804
805 int
806 uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
807 vm_map_t map;
808 vaddr_t *addr;
809 vsize_t size;
810 vm_prot_t prot, maxprot;
811 int flags;
812 caddr_t handle; /* XXX: VNODE? */
813 vaddr_t foff;
814 {
815 struct uvm_object *uobj;
816 struct vnode *vp;
817 int retval;
818 int advice = UVM_ADV_NORMAL;
819 uvm_flag_t uvmflag = 0;
820
821 /*
822 * check params
823 */
824
825 if (size == 0)
826 return(0);
827 if (foff & PAGE_MASK)
828 return(EINVAL);
829 if ((prot & maxprot) != prot)
830 return(EINVAL);
831
832 /*
833 * for non-fixed mappings, round off the suggested address.
834 * for fixed mappings, check alignment and zap old mappings.
835 */
836
837 if ((flags & MAP_FIXED) == 0) {
838 *addr = round_page(*addr); /* round */
839 } else {
840
841 if (*addr & PAGE_MASK)
842 return(EINVAL);
843 uvmflag |= UVM_FLAG_FIXED;
844 (void) uvm_unmap(map, *addr, *addr + size); /* zap! */
845 }
846
847 /*
848 * handle anon vs. non-anon mappings. for non-anon mappings attach
849 * to underlying vm object.
850 */
851
852 if (flags & MAP_ANON) {
853
854 foff = UVM_UNKNOWN_OFFSET;
855 uobj = NULL;
856 if ((flags & MAP_SHARED) == 0)
857 /* XXX: defer amap create */
858 uvmflag |= UVM_FLAG_COPYONW;
859 else
860 /* shared: create amap now */
861 uvmflag |= UVM_FLAG_OVERLAY;
862
863 } else {
864
865 vp = (struct vnode *) handle; /* get vnode */
866 if (vp->v_type != VCHR) {
867 uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
868 maxprot : (maxprot & ~VM_PROT_WRITE));
869
870 /*
871 * XXXCDC: hack from old code
872 * don't allow vnodes which have been mapped
873 * shared-writeable to persist [forces them to be
874 * flushed out when last reference goes].
875 * XXXCDC: interesting side effect: avoids a bug.
876 * note that in WRITE [ufs_readwrite.c] that we
877 * allocate buffer, uncache, and then do the write.
878 * the problem with this is that if the uncache causes
879 * VM data to be flushed to the same area of the file
880 * we are writing to... in that case we've got the
881 * buffer locked and our process goes to sleep forever.
882 *
883 * XXXCDC: checking maxprot protects us from the
884 * "persistbug" program but this is not a long term
885 * solution.
886 *
887 * XXXCDC: we don't bother calling uncache with the vp
888 * VOP_LOCKed since we know that we are already
889 * holding a valid reference to the uvn (from the
890 * uvn_attach above), and thus it is impossible for
891 * the uncache to kill the uvn and trigger I/O.
892 */
893 if (flags & MAP_SHARED) {
894 if ((prot & VM_PROT_WRITE) ||
895 (maxprot & VM_PROT_WRITE)) {
896 uvm_vnp_uncache(vp);
897 }
898 }
899
900 } else {
901 uobj = udv_attach((void *) &vp->v_rdev,
902 (flags & MAP_SHARED) ?
903 maxprot : (maxprot & ~VM_PROT_WRITE), foff, size);
904 advice = UVM_ADV_RANDOM;
905 }
906
907 if (uobj == NULL)
908 return((vp->v_type == VREG) ? ENOMEM : EINVAL);
909
910 if ((flags & MAP_SHARED) == 0)
911 uvmflag |= UVM_FLAG_COPYONW;
912 }
913
914 /*
915 * set up mapping flags
916 */
917
918 uvmflag = UVM_MAPFLAG(prot, maxprot,
919 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
920 advice, uvmflag);
921
922 /*
923 * do it!
924 */
925
926 retval = uvm_map(map, addr, size, uobj, foff, uvmflag);
927
928 if (retval == KERN_SUCCESS)
929 return(0);
930
931 /*
932 * errors: first detach from the uobj, if any.
933 */
934
935 if (uobj)
936 uobj->pgops->pgo_detach(uobj);
937
938 switch (retval) {
939 case KERN_INVALID_ADDRESS:
940 case KERN_NO_SPACE:
941 return(ENOMEM);
942 case KERN_PROTECTION_FAILURE:
943 return(EACCES);
944 }
945 return(EINVAL);
946 }
947