uvm_mmap.c revision 1.1 1 /* $Id: uvm_mmap.c,v 1.1 1998/02/05 06:25:09 mrg Exp $ */
2
3 /*
4 * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
5 * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
6 */
7 /*
8 * Copyright (c) 1997 Charles D. Cranor and Washington University.
9 * Copyright (c) 1991, 1993 The Regents of the University of California.
10 * Copyright (c) 1988 University of Utah.
11 *
12 * All rights reserved.
13 *
14 * This code is derived from software contributed to Berkeley by
15 * the Systems Programming Group of the University of Utah Computer
16 * Science Department.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the Charles D. Cranor,
29 * Washington University, University of California, Berkeley and
30 * its contributors.
31 * 4. Neither the name of the University nor the names of its contributors
32 * may be used to endorse or promote products derived from this software
33 * without specific prior written permission.
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
36 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
40 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
41 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
43 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
44 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 * SUCH DAMAGE.
46 *
47 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
48 *
49 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
50 */
51
52 /*
53 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
54 * function.
55 */
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/file.h>
59 #include <sys/filedesc.h>
60 #include <sys/resourcevar.h>
61 #include <sys/mman.h>
62 #include <sys/mount.h>
63 #include <sys/proc.h>
64 #include <sys/malloc.h>
65 #include <sys/vnode.h>
66 #include <sys/conf.h>
67
68 #include <miscfs/specfs/specdev.h>
69
70 #include <vm/vm.h>
71 #include <vm/vm_page.h>
72 #include <vm/vm_kern.h>
73
74 #include <sys/syscallargs.h>
75
76 #include <uvm/uvm.h>
77 #include <uvm/uvm_device.h>
78 #include <uvm/uvm_vnode.h>
79
80
81 /*
82 * unimplemented VM system calls:
83 */
84
85 /*
86 * sys_sbrk: sbrk system call.
87 */
88
89 /* ARGSUSED */
90 int sys_sbrk(p, v, retval)
91
92 struct proc *p;
93 void *v;
94 register_t *retval;
95
96 {
97 #if 0
98 struct sys_sbrk_args /* {
99 syscallarg(int) incr;
100 } */ *uap = v;
101 #endif
102 return (EOPNOTSUPP);
103 }
104
105 /*
106 * sys_sstk: sstk system call.
107 */
108
109 /* ARGSUSED */
110 int sys_sstk(p, v, retval)
111
112 struct proc *p;
113 void *v;
114 register_t *retval;
115
116 {
117 #if 0
118 struct sys_sstk_args /* {
119 syscallarg(int) incr;
120 } */ *uap = v;
121 #endif
122 return (EOPNOTSUPP);
123 }
124
125 /*
126 * sys_madvise: give advice about memory usage.
127 */
128
129 /* ARGSUSED */
130 int sys_madvise(p, v, retval)
131
132 struct proc *p;
133 void *v;
134 register_t *retval;
135
136 {
137 #if 0
138 struct sys_madvise_args /* {
139 syscallarg(caddr_t) addr;
140 syscallarg(size_t) len;
141 syscallarg(int) behav;
142 } */ *uap = v;
143 #endif
144 return (EOPNOTSUPP);
145 }
146
147 /*
148 * sys_mincore: determine if pages are in core or not.
149 */
150
151 /* ARGSUSED */
152 int sys_mincore(p, v, retval)
153
154 struct proc *p;
155 void *v;
156 register_t *retval;
157
158 {
159 #if 0
160 struct sys_mincore_args /* {
161 syscallarg(caddr_t) addr;
162 syscallarg(size_t) len;
163 syscallarg(char *) vec;
164 } */ *uap = v;
165 #endif
166 return (EOPNOTSUPP);
167 }
168
169 #if 0
170 /*
171 * munmapfd: unmap file descriptor
172 *
173 * XXX: is this acutally a useful function? could it be useful?
174 */
175
176 void munmapfd(p, fd)
177
178 struct proc *p;
179 int fd;
180
181 {
182 /*
183 * XXX should vm_deallocate any regions mapped to this file
184 */
185 p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
186 }
187 #endif
188
189 /*
190 * sys_mmap: mmap system call.
191 *
192 * => file offest and address may not be page aligned
193 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
194 * - if address isn't page aligned the mapping starts at trunc_page(addr)
195 * and the return value is adjusted up by the page offset.
196 */
197
198 int sys_mmap(p, v, retval)
199
200 struct proc *p;
201 void *v;
202 register_t *retval;
203
204 {
205 register struct sys_mmap_args /* {
206 syscallarg(caddr_t) addr;
207 syscallarg(size_t) len;
208 syscallarg(int) prot;
209 syscallarg(int) flags;
210 syscallarg(int) fd;
211 syscallarg(long) pad;
212 syscallarg(off_t) pos;
213 } */ *uap = v;
214 vm_offset_t addr;
215 off_t pos;
216 vm_size_t size, pageoff;
217 vm_prot_t prot, maxprot;
218 int flags, fd;
219 vm_offset_t vm_min_address = VM_MIN_ADDRESS;
220 register struct filedesc *fdp = p->p_fd;
221 register struct file *fp;
222 struct vnode *vp;
223 caddr_t handle;
224 int error;
225
226 /*
227 * first, extract syscall args from the uap.
228 */
229
230 addr = (vm_offset_t) SCARG(uap, addr);
231 size = (vm_size_t) SCARG(uap, len);
232 prot = SCARG(uap, prot) & VM_PROT_ALL;
233 flags = SCARG(uap, flags);
234 fd = SCARG(uap, fd);
235 pos = SCARG(uap, pos);
236
237 /*
238 * make sure that the newsize fits within a vm_offset_t
239 * XXX: need to revise addressing data types
240 */
241 if (pos + size > (vm_offset_t)-PAGE_SIZE) {
242 #ifdef DEBUG
243 printf("mmap: pos=%qx, size=%x too big\n", pos, (int)size);
244 #endif
245 return(EINVAL);
246 }
247
248 /*
249 * align file position and save offset. adjust size.
250 */
251
252 pageoff = (pos & PAGE_MASK);
253 pos -= pageoff;
254 size += pageoff; /* add offset */
255 size = (vm_size_t) round_page(size); /* round up */
256 if ((ssize_t) size < 0)
257 return(EINVAL); /* don't allow wrap */
258
259 /*
260 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
261 */
262
263 if (flags & MAP_FIXED) {
264
265 /* ensure address and file offset are aligned properly */
266 addr -= pageoff;
267 if (addr & PAGE_MASK)
268 return(EINVAL);
269
270 if (VM_MAXUSER_ADDRESS > 0 && (addr + size) > VM_MAXUSER_ADDRESS)
271 return(EINVAL);
272 if (vm_min_address > 0 && addr < vm_min_address)
273 return(EINVAL);
274 if (addr > addr + size)
275 return (EINVAL); /* no wrapping! */
276
277 } else {
278
279 /*
280 * not fixed: make sure we skip over the largest possible heap.
281 * we will refine our guess later (e.g. to account for VAC, etc.)
282 */
283 if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
284 addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
285 }
286
287 /*
288 * check for file mappings (i.e. not anonymous) and verify file.
289 */
290
291 if ((flags & MAP_ANON) == 0) {
292
293 if (fd < 0 || fd >= fdp->fd_nfiles)
294 return(EBADF); /* failed range check? */
295 fp = fdp->fd_ofiles[fd]; /* convert to file pointer */
296 if (fp == NULL)
297 return(EBADF);
298
299 if (fp->f_type != DTYPE_VNODE)
300 return(EINVAL); /* only mmap vnodes! */
301 vp = (struct vnode *)fp->f_data; /* convert to vnode */
302 if (vp->v_type != VREG && vp->v_type != VCHR)
303 return (EINVAL); /* only REG/CHR support mmap */
304
305 /* special case: catch SunOS style /dev/zero */
306 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
307 flags |= MAP_ANON;
308 goto is_anon;
309 }
310
311 #if defined(COMPAT_13)
312 /*
313 * XXX: support MAP_FILE: some older applications call mmap with flags
314 * set to MAP_FILE (i.e. zero). the proper semantics for this seem
315 * to be MAP_SHARED for devices and MAP_PRIVATE for files.
316 *
317 * XXX: how does MAP_ANON fit in the picture?
318 * XXX: what about MAP_COPY?
319 */
320
321 if ((flags & (MAP_SHARED|MAP_PRIVATE|MAP_COPY)) == 0) {
322 #if defined(DIAGNOSTIC)
323 printf("WARNING: corrected bogus mmap (pid %d comm %s)\n", p->p_pid,
324 p->p_comm);
325 #endif
326 if (vp->v_type == VCHR)
327 flags |= MAP_SHARED; /* for a device */
328 else
329 flags |= MAP_PRIVATE; /* for a file */
330 }
331 #else
332
333 if ((flags & (MAP_SHARED|MAP_PRIVATE|MAP_COPY)) == 0)
334 return(EINVAL); /* sorry, old timer */
335
336 #endif
337
338 #if defined(sparc)
339 /*
340 * sparc seems to want to map devices MAP_PRIVATE, which doesn't
341 * make sense for us (why would we want to copy-on-write fault
342 * framebuffer mappings?). fix this.
343 */
344 if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
345 #if defined(DIAGNOSTIC)
346 printf("WARNING: converting MAP_PRIVATE device mapping to MAP_SHARE "
347 "(pid %d comm %s)\n", p->p_pid, p->p_comm);
348 #endif
349 flags = (flags & ~MAP_PRIVATE) | MAP_SHARED; /* switch it */
350 }
351 #endif
352
353 /*
354 * now check protection
355 */
356
357 maxprot = VM_PROT_EXECUTE;
358
359 /* check read access */
360 if (fp->f_flag & FREAD)
361 maxprot |= VM_PROT_READ;
362 else if (prot & PROT_READ)
363 return(EACCES);
364
365 /* check write case (if shared) */
366 if (flags & MAP_SHARED) {
367 if (fp->f_flag & FWRITE)
368 maxprot |= VM_PROT_WRITE;
369 else if (prot & PROT_WRITE)
370 return(EACCES);
371 } else {
372 maxprot |= VM_PROT_WRITE; /* MAP_PRIVATE mappings can always write to */
373 }
374
375 /*
376 * set handle to vnode
377 */
378
379 handle = (caddr_t)vp;
380
381 } else /* MAP_ANON case */ {
382
383 if (fd != -1)
384 return(EINVAL);
385
386 is_anon: /* label for SunOS style /dev/zero */
387 handle = NULL;
388 maxprot = VM_PROT_ALL;
389 pos = 0;
390 }
391
392 /*
393 * now let kernel internal function uvm_mmap do the work.
394 */
395
396 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
397 flags, handle, pos);
398
399 if (error == 0)
400 *retval = (register_t)(addr + pageoff); /* remember to add offset */
401
402 return (error);
403 }
404
405 /*
406 * sys___msync13: the msync system call (a front-end for flush)
407 */
408
409 int sys___msync13(p, v, retval)
410
411 struct proc *p;
412 void *v;
413 register_t *retval;
414
415 {
416 struct sys___msync13_args /* {
417 syscallarg(caddr_t) addr;
418 syscallarg(size_t) len;
419 syscallarg(int) flags;
420 } */ *uap = v;
421 vm_offset_t addr;
422 vm_size_t size, pageoff;
423 vm_map_t map;
424 int rv, flags, uvmflags;
425
426 /*
427 * extract syscall args from the uap
428 */
429
430 addr = (vm_offset_t)SCARG(uap, addr);
431 size = (vm_size_t)SCARG(uap, len);
432 flags = SCARG(uap, flags);
433
434 /* sanity check flags */
435 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
436 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
437 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
438 return (EINVAL);
439 if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
440 flags |= MS_SYNC;
441
442 /*
443 * align the address to a page boundary, and adjust the size accordingly.
444 */
445
446 pageoff = (addr & PAGE_MASK);
447 addr -= pageoff;
448 size += pageoff;
449 size = (vm_size_t) round_page(size);
450
451 /* disallow wrap-around. */
452 if (addr + size < addr)
453 return (EINVAL);
454
455 /*
456 * get map
457 */
458
459 map = &p->p_vmspace->vm_map;
460
461 /*
462 * XXXCDC: do we really need this semantic?
463 *
464 * XXX Gak! If size is zero we are supposed to sync "all modified
465 * pages with the region containing addr". Unfortunately, we
466 * don't really keep track of individual mmaps so we approximate
467 * by flushing the range of the map entry containing addr.
468 * This can be incorrect if the region splits or is coalesced
469 * with a neighbor.
470 */
471 if (size == 0) {
472 vm_map_entry_t entry;
473
474 vm_map_lock_read(map);
475 rv = uvm_map_lookup_entry(map, addr, &entry);
476 if (rv == TRUE) {
477 addr = entry->start;
478 size = entry->end - entry->start;
479 }
480 vm_map_unlock_read(map);
481 if (rv == FALSE)
482 return (EINVAL);
483 }
484
485 /*
486 * translate MS_ flags into PGO_ flags
487 */
488 uvmflags = (flags & MS_INVALIDATE) ? PGO_FREE : 0;
489 if (flags & MS_SYNC)
490 uvmflags |= PGO_SYNCIO;
491 else
492 uvmflags |= PGO_SYNCIO; /* XXXCDC: force sync for now! */
493
494 /*
495 * doit!
496 */
497 rv = uvm_map_clean(map, addr, addr+size, uvmflags);
498
499 /*
500 * and return...
501 */
502 switch (rv) {
503 case KERN_SUCCESS:
504 return(0);
505 case KERN_INVALID_ADDRESS:
506 return (ENOMEM);
507 case KERN_FAILURE:
508 return (EIO);
509 case KERN_PAGES_LOCKED: /* XXXCDC: uvm doesn't return this */
510 return (EBUSY);
511 default:
512 return (EINVAL);
513 }
514 /*NOTREACHED*/
515 }
516
517 /*
518 * sys_munmap: unmap a users memory
519 */
520
521 int sys_munmap(p, v, retval)
522
523 register struct proc *p;
524 void *v;
525 register_t *retval;
526
527 {
528 register struct sys_munmap_args /* {
529 syscallarg(caddr_t) addr;
530 syscallarg(size_t) len;
531 } */ *uap = v;
532 vm_offset_t addr;
533 vm_size_t size, pageoff;
534 vm_map_t map;
535 vm_offset_t vm_min_address = VM_MIN_ADDRESS;
536 struct vm_map_entry *dead_entries;
537
538 /*
539 * get syscall args...
540 */
541
542 addr = (vm_offset_t) SCARG(uap, addr);
543 size = (vm_size_t) SCARG(uap, len);
544
545 /*
546 * align the address to a page boundary, and adjust the size accordingly.
547 */
548
549 pageoff = (addr & PAGE_MASK);
550 addr -= pageoff;
551 size += pageoff;
552 size = (vm_size_t) round_page(size);
553
554 if ((int)size < 0)
555 return(EINVAL);
556 if (size == 0)
557 return(0);
558
559 /*
560 * Check for illegal addresses. Watch out for address wrap...
561 * Note that VM_*_ADDRESS are not constants due to casts (argh).
562 */
563 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
564 return (EINVAL);
565 if (vm_min_address > 0 && addr < vm_min_address)
566 return (EINVAL);
567 if (addr > addr + size)
568 return (EINVAL);
569 map = &p->p_vmspace->vm_map;
570
571
572 vm_map_lock(map); /* lock map so we can checkprot */
573
574 /*
575 * interesting system call semantic: make sure entire range is
576 * allocated before allowing an unmap.
577 */
578
579 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
580 vm_map_unlock(map);
581 return(EINVAL);
582 }
583
584 /*
585 * doit!
586 */
587 (void) uvm_unmap_remove(map, addr, addr + size, 0, &dead_entries);
588
589 vm_map_unlock(map); /* and unlock */
590
591 if (dead_entries != NULL)
592 uvm_unmap_detach(dead_entries, 0);
593
594 return(0);
595 }
596
597 /*
598 * sys_mprotect: the mprotect system call
599 */
600
601 int sys_mprotect(p, v, retval)
602
603 struct proc *p;
604 void *v;
605 register_t *retval;
606
607 {
608 struct sys_mprotect_args /* {
609 syscallarg(caddr_t) addr;
610 syscallarg(int) len;
611 syscallarg(int) prot;
612 } */ *uap = v;
613 vm_offset_t addr;
614 vm_size_t size, pageoff;
615 vm_prot_t prot;
616 int rv;
617
618 /*
619 * extract syscall args from uap
620 */
621
622 addr = (vm_offset_t)SCARG(uap, addr);
623 size = (vm_size_t)SCARG(uap, len);
624 prot = SCARG(uap, prot) & VM_PROT_ALL;
625
626 /*
627 * align the address to a page boundary, and adjust the size accordingly
628 */
629 pageoff = (addr & PAGE_MASK);
630 addr -= pageoff;
631 size += pageoff;
632 size = (vm_size_t) round_page(size);
633 if ((int)size < 0)
634 return(EINVAL);
635
636 /*
637 * doit
638 */
639
640 rv = uvm_map_protect(&p->p_vmspace->vm_map,
641 addr, addr+size, prot, FALSE);
642
643 if (rv == KERN_SUCCESS)
644 return(0);
645 if (rv == KERN_PROTECTION_FAILURE)
646 return(EACCES);
647 return(EINVAL);
648 }
649
650 /*
651 * sys_minherit: the minherit system call
652 */
653
654 int sys_minherit(p, v, retval)
655
656 struct proc *p;
657 void *v;
658 register_t *retval;
659
660 {
661 struct sys_minherit_args /* {
662 syscallarg(caddr_t) addr;
663 syscallarg(int) len;
664 syscallarg(int) inherit;
665 } */ *uap = v;
666 vm_offset_t addr;
667 vm_size_t size, pageoff;
668 register vm_inherit_t inherit;
669
670 addr = (vm_offset_t)SCARG(uap, addr);
671 size = (vm_size_t)SCARG(uap, len);
672 inherit = SCARG(uap, inherit);
673 /*
674 * align the address to a page boundary, xand adjust the size accordingly.
675 */
676 pageoff = (addr & PAGE_MASK);
677 addr -= pageoff;
678 size += pageoff;
679 size = (vm_size_t) round_page(size);
680 if ((int)size < 0)
681 return(EINVAL);
682
683 switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
684 inherit)) {
685 case KERN_SUCCESS:
686 return (0);
687 case KERN_PROTECTION_FAILURE:
688 return (EACCES);
689 }
690 return (EINVAL);
691 }
692
693 /*
694 * sys_mlock: memory lock
695 */
696
697 int sys_mlock(p, v, retval)
698
699 struct proc *p;
700 void *v;
701 register_t *retval;
702
703 {
704 struct sys_mlock_args /* {
705 syscallarg(caddr_t) addr;
706 syscallarg(size_t) len;
707 } */ *uap = v;
708 vm_offset_t addr;
709 vm_size_t size, pageoff;
710 int error;
711
712 /*
713 * extract syscall args from uap
714 */
715 addr = (vm_offset_t)SCARG(uap, addr);
716 size = (vm_size_t)SCARG(uap, len);
717
718 /*
719 * align the address to a page boundary and adjust the size accordingly
720 */
721 pageoff = (addr & PAGE_MASK);
722 addr -= pageoff;
723 size += pageoff;
724 size = (vm_size_t) round_page(size);
725
726 /* disallow wrap-around. */
727 if (addr + (int)size < addr)
728 return (EINVAL);
729
730 if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
731 return (EAGAIN);
732
733 #ifdef pmap_wired_count
734 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
735 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
736 return (EAGAIN);
737 #else
738 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
739 return (error);
740 #endif
741
742 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
743 return (error == KERN_SUCCESS ? 0 : ENOMEM);
744 }
745
746 /*
747 * sys_munlock: unlock wired pages
748 */
749
750 int sys_munlock(p, v, retval)
751
752 struct proc *p;
753 void *v;
754 register_t *retval;
755
756 {
757 struct sys_munlock_args /* {
758 syscallarg(caddr_t) addr;
759 syscallarg(size_t) len;
760 } */ *uap = v;
761 vm_offset_t addr;
762 vm_size_t size, pageoff;
763 int error;
764
765 /*
766 * extract syscall args from uap
767 */
768
769 addr = (vm_offset_t)SCARG(uap, addr);
770 size = (vm_size_t)SCARG(uap, len);
771
772 /*
773 * align the address to a page boundary, and adjust the size accordingly
774 */
775 pageoff = (addr & PAGE_MASK);
776 addr -= pageoff;
777 size += pageoff;
778 size = (vm_size_t) round_page(size);
779
780 /* disallow wrap-around. */
781 if (addr + (int)size < addr)
782 return (EINVAL);
783
784 #ifndef pmap_wired_count
785 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
786 return (error);
787 #endif
788
789 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
790 return (error == KERN_SUCCESS ? 0 : ENOMEM);
791 }
792
793 /*
794 * uvm_mmap: internal version of mmap
795 *
796 * - used by sys_mmap, exec, and sysv shm
797 * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true,
798 * sysv shm uses "named anonymous memory")
799 * - caller must page-align the file offset
800 */
801
802 int uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
803
804 vm_map_t map;
805 vm_offset_t *addr;
806 vm_size_t size;
807 vm_prot_t prot, maxprot;
808 int flags;
809 caddr_t handle; /* XXX: VNODE? */
810 vm_offset_t foff;
811
812 {
813 struct uvm_object *uobj;
814 struct vnode *vp;
815 int retval;
816 int advice = UVM_ADV_NORMAL;
817 uvm_flag_t uvmflag = 0;
818
819 /*
820 * check params
821 */
822
823 if (size == 0)
824 return(0);
825 if (foff & PAGE_MASK)
826 return(EINVAL);
827 if ((prot & maxprot) != prot)
828 return(EINVAL);
829
830 /*
831 * for non-fixed mappings, round off the suggested address.
832 * for fixed mappings, check alignment and zap old mappings.
833 */
834
835 if ((flags & MAP_FIXED) == 0) {
836
837 *addr = round_page(*addr); /* round */
838
839 } else {
840
841 if (*addr & PAGE_MASK)
842 return(EINVAL);
843 uvmflag |= UVM_FLAG_FIXED;
844 (void) uvm_unmap(map, *addr, *addr + size, 0); /* zap! */
845 }
846
847 /*
848 * handle anon vs. non-anon mappings. for non-anon mappings attach
849 * to underlying vm object.
850 */
851
852 if (flags & MAP_ANON) {
853
854 foff = UVM_UNKNOWN_OFFSET;
855 uobj = NULL;
856 if ((flags & MAP_SHARED) == 0)
857 uvmflag |= UVM_FLAG_COPYONW; /* XXX: defer amap create */
858 else
859 uvmflag |= UVM_FLAG_OVERLAY; /* shared: create amap now */
860
861 } else {
862
863 vp = (struct vnode *) handle; /* get vnode */
864 if (vp->v_type != VCHR) {
865 uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ? maxprot :
866 (maxprot & ~VM_PROT_WRITE));
867
868 /*
869 * XXXCDC: hack from old code
870 * don't allow vnodes which have been mapped shared-writeable to
871 * persist [forces them to be flushed out when last reference goes].
872 *
873 * XXXCDC: interesting side effect: avoids a bug. note that in
874 * WRITE [ufs_readwrite.c] that we allocate buffer, uncache, and
875 * then do the write. the problem with this is that if the uncache
876 * causes VM data to be flushed to the same area of the file we
877 * are writing to... in that case we've got the buffer locked and
878 * our process goes to sleep forever.
879 *
880 * XXXCDC: checking maxprot protects us from the "persistbug"
881 * program but this is not a long term solution.
882 *
883 * XXXCDC: we don't bother calling uncache with the vp VOP_LOCKed
884 * since we know that we are already holding a valid reference to
885 * the uvn (from the uvn_attach above), and thus it is impossible
886 * for the uncache to kill the uvn and trigger I/O.
887 */
888 if (flags & MAP_SHARED) {
889 if ((prot & VM_PROT_WRITE) || (maxprot & VM_PROT_WRITE)) {
890 uvm_vnp_uncache(vp);
891 }
892 }
893
894 } else {
895 uobj = udv_attach((void *) &vp->v_rdev, (flags & MAP_SHARED) ? maxprot :
896 (maxprot & ~VM_PROT_WRITE));
897 advice = UVM_ADV_RANDOM;
898 }
899
900 if (uobj == NULL)
901 return((vp->v_type == VCHR) ? EINVAL : ENOMEM);
902
903 if ((flags & MAP_SHARED) == 0)
904 uvmflag |= UVM_FLAG_COPYONW;
905 }
906
907 /*
908 * set up mapping flags
909 */
910
911 uvmflag = UVM_MAPFLAG(prot, maxprot,
912 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
913 advice, uvmflag);
914
915 /*
916 * do it!
917 */
918
919 retval = uvm_map(map, addr, size, uobj, foff, uvmflag);
920
921 if (retval == KERN_SUCCESS)
922 return(0);
923
924 /*
925 * errors: first detach from the uobj, if any.
926 */
927
928 if (uobj)
929 uobj->pgops->pgo_detach(uobj);
930
931 switch (retval) {
932 case KERN_INVALID_ADDRESS:
933 case KERN_NO_SPACE:
934 return(ENOMEM);
935 case KERN_PROTECTION_FAILURE:
936 return(EACCES);
937 }
938 return(EINVAL);
939 }
940