uvm_mmap.c revision 1.17 1 /* $NetBSD: uvm_mmap.c,v 1.17 1999/03/09 12:18:23 kleink Exp $ */
2
3 /*
4 * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
5 * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
6 */
7 /*
8 * Copyright (c) 1997 Charles D. Cranor and Washington University.
9 * Copyright (c) 1991, 1993 The Regents of the University of California.
10 * Copyright (c) 1988 University of Utah.
11 *
12 * All rights reserved.
13 *
14 * This code is derived from software contributed to Berkeley by
15 * the Systems Programming Group of the University of Utah Computer
16 * Science Department.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the Charles D. Cranor,
29 * Washington University, University of California, Berkeley and
30 * its contributors.
31 * 4. Neither the name of the University nor the names of its contributors
32 * may be used to endorse or promote products derived from this software
33 * without specific prior written permission.
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
36 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
40 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
41 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
43 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
44 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 * SUCH DAMAGE.
46 *
47 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
48 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
49 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
50 */
51
52 /*
53 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
54 * function.
55 */
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/file.h>
59 #include <sys/filedesc.h>
60 #include <sys/resourcevar.h>
61 #include <sys/mman.h>
62 #include <sys/mount.h>
63 #include <sys/proc.h>
64 #include <sys/malloc.h>
65 #include <sys/vnode.h>
66 #include <sys/conf.h>
67 #include <sys/stat.h>
68
69 #include <miscfs/specfs/specdev.h>
70
71 #include <vm/vm.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_kern.h>
74
75 #include <sys/syscallargs.h>
76
77 #include <uvm/uvm.h>
78 #include <uvm/uvm_device.h>
79 #include <uvm/uvm_vnode.h>
80
81
82 /*
83 * unimplemented VM system calls:
84 */
85
86 /*
87 * sys_sbrk: sbrk system call.
88 */
89
90 /* ARGSUSED */
91 int
92 sys_sbrk(p, v, retval)
93 struct proc *p;
94 void *v;
95 register_t *retval;
96 {
97 #if 0
98 struct sys_sbrk_args /* {
99 syscallarg(int) incr;
100 } */ *uap = v;
101 #endif
102
103 return (ENOSYS);
104 }
105
106 /*
107 * sys_sstk: sstk system call.
108 */
109
110 /* ARGSUSED */
111 int
112 sys_sstk(p, v, retval)
113 struct proc *p;
114 void *v;
115 register_t *retval;
116 {
117 #if 0
118 struct sys_sstk_args /* {
119 syscallarg(int) incr;
120 } */ *uap = v;
121 #endif
122
123 return (ENOSYS);
124 }
125
126 /*
127 * sys_madvise: give advice about memory usage.
128 */
129
130 /* ARGSUSED */
131 int
132 sys_madvise(p, v, retval)
133 struct proc *p;
134 void *v;
135 register_t *retval;
136 {
137 #if 0
138 struct sys_madvise_args /* {
139 syscallarg(caddr_t) addr;
140 syscallarg(size_t) len;
141 syscallarg(int) behav;
142 } */ *uap = v;
143 #endif
144
145 return (ENOSYS);
146 }
147
148 /*
149 * sys_mincore: determine if pages are in core or not.
150 */
151
152 /* ARGSUSED */
153 int
154 sys_mincore(p, v, retval)
155 struct proc *p;
156 void *v;
157 register_t *retval;
158 {
159 #if 0
160 struct sys_mincore_args /* {
161 syscallarg(caddr_t) addr;
162 syscallarg(size_t) len;
163 syscallarg(char *) vec;
164 } */ *uap = v;
165 #endif
166
167 return (ENOSYS);
168 }
169
170 #if 0
171 /*
172 * munmapfd: unmap file descriptor
173 *
174 * XXX: is this acutally a useful function? could it be useful?
175 */
176
177 void
178 munmapfd(p, fd)
179 struct proc *p;
180 int fd;
181 {
182
183 /*
184 * XXX should vm_deallocate any regions mapped to this file
185 */
186 p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
187 }
188 #endif
189
190 /*
191 * sys_mmap: mmap system call.
192 *
193 * => file offest and address may not be page aligned
194 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
195 * - if address isn't page aligned the mapping starts at trunc_page(addr)
196 * and the return value is adjusted up by the page offset.
197 */
198
199 int
200 sys_mmap(p, v, retval)
201 struct proc *p;
202 void *v;
203 register_t *retval;
204 {
205 register struct sys_mmap_args /* {
206 syscallarg(caddr_t) addr;
207 syscallarg(size_t) len;
208 syscallarg(int) prot;
209 syscallarg(int) flags;
210 syscallarg(int) fd;
211 syscallarg(long) pad;
212 syscallarg(off_t) pos;
213 } */ *uap = v;
214 vaddr_t addr;
215 struct vattr va;
216 off_t pos;
217 vsize_t size, pageoff;
218 vm_prot_t prot, maxprot;
219 int flags, fd;
220 vaddr_t vm_min_address = VM_MIN_ADDRESS;
221 register struct filedesc *fdp = p->p_fd;
222 register struct file *fp;
223 struct vnode *vp;
224 caddr_t handle;
225 int error;
226
227 /*
228 * first, extract syscall args from the uap.
229 */
230
231 addr = (vaddr_t) SCARG(uap, addr);
232 size = (vsize_t) SCARG(uap, len);
233 prot = SCARG(uap, prot) & VM_PROT_ALL;
234 flags = SCARG(uap, flags);
235 fd = SCARG(uap, fd);
236 pos = SCARG(uap, pos);
237
238 /*
239 * make sure that the newsize fits within a vaddr_t
240 * XXX: need to revise addressing data types
241 */
242 if (pos + size > (vaddr_t)-PAGE_SIZE) {
243 #ifdef DEBUG
244 printf("mmap: pos=%qx, size=%lx too big\n", (long long)pos,
245 (long)size);
246 #endif
247 return (EINVAL);
248 }
249
250 /*
251 * align file position and save offset. adjust size.
252 */
253
254 pageoff = (pos & PAGE_MASK);
255 pos -= pageoff;
256 size += pageoff; /* add offset */
257 size = (vsize_t) round_page(size); /* round up */
258 if ((ssize_t) size < 0)
259 return (EINVAL); /* don't allow wrap */
260
261 /*
262 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
263 */
264
265 if (flags & MAP_FIXED) {
266
267 /* ensure address and file offset are aligned properly */
268 addr -= pageoff;
269 if (addr & PAGE_MASK)
270 return (EINVAL);
271
272 if (VM_MAXUSER_ADDRESS > 0 &&
273 (addr + size) > VM_MAXUSER_ADDRESS)
274 return (EINVAL);
275 if (vm_min_address > 0 && addr < vm_min_address)
276 return (EINVAL);
277 if (addr > addr + size)
278 return (EINVAL); /* no wrapping! */
279
280 } else {
281
282 /*
283 * not fixed: make sure we skip over the largest possible heap.
284 * we will refine our guess later (e.g. to account for VAC, etc)
285 */
286 if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
287 addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
288 }
289
290 /*
291 * check for file mappings (i.e. not anonymous) and verify file.
292 */
293
294 if ((flags & MAP_ANON) == 0) {
295
296 if (fd < 0 || fd >= fdp->fd_nfiles)
297 return(EBADF); /* failed range check? */
298 fp = fdp->fd_ofiles[fd]; /* convert to file pointer */
299 if (fp == NULL)
300 return(EBADF);
301
302 if (fp->f_type != DTYPE_VNODE)
303 return (ENODEV); /* only mmap vnodes! */
304 vp = (struct vnode *)fp->f_data; /* convert to vnode */
305
306 if (vp->v_type != VREG && vp->v_type != VCHR &&
307 vp->v_type != VBLK)
308 return (ENODEV); /* only REG/CHR/BLK support mmap */
309
310 /* special case: catch SunOS style /dev/zero */
311 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
312 flags |= MAP_ANON;
313 goto is_anon;
314 }
315
316 /*
317 * Old programs may not select a specific sharing type, so
318 * default to an appropriate one.
319 *
320 * XXX: how does MAP_ANON fit in the picture?
321 */
322 if ((flags & (MAP_SHARED|MAP_PRIVATE|MAP_COPY)) == 0) {
323 #if defined(DEBUG)
324 printf("WARNING: defaulted mmap() share type to "
325 "%s (pid %d comm %s)\n", vp->v_type == VCHR ?
326 "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
327 p->p_comm);
328 #endif
329 if (vp->v_type == VCHR)
330 flags |= MAP_SHARED; /* for a device */
331 else
332 flags |= MAP_PRIVATE; /* for a file */
333 }
334
335 /*
336 * MAP_PRIVATE device mappings don't make sense (and aren't
337 * supported anyway). However, some programs rely on this,
338 * so just change it to MAP_SHARED.
339 */
340 if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
341 #if defined(DIAGNOSTIC)
342 printf("WARNING: converted MAP_PRIVATE device mapping "
343 "to MAP_SHARED (pid %d comm %s)\n", p->p_pid,
344 p->p_comm);
345 #endif
346 flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
347 }
348
349 /*
350 * now check protection
351 */
352
353 maxprot = VM_PROT_EXECUTE;
354
355 /* check read access */
356 if (fp->f_flag & FREAD)
357 maxprot |= VM_PROT_READ;
358 else if (prot & PROT_READ)
359 return (EACCES);
360
361 /* check write access, shared case first */
362 if (flags & MAP_SHARED) {
363 /*
364 * if the file is writable, only add PROT_WRITE to
365 * maxprot if the file is not immutable, append-only.
366 * otherwise, if we have asked for PROT_WRITE, return
367 * EPERM.
368 */
369 if (fp->f_flag & FWRITE) {
370 if ((error =
371 VOP_GETATTR(vp, &va, p->p_ucred, p)))
372 return (error);
373 if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
374 maxprot |= VM_PROT_WRITE;
375 else if (prot & PROT_WRITE)
376 return (EPERM);
377 }
378 else if (prot & PROT_WRITE)
379 return (EACCES);
380 } else {
381 /* MAP_PRIVATE mappings can always write to */
382 maxprot |= VM_PROT_WRITE;
383 }
384
385 /*
386 * set handle to vnode
387 */
388
389 handle = (caddr_t)vp;
390
391 } else { /* MAP_ANON case */
392
393 if (fd != -1)
394 return (EINVAL);
395
396 is_anon: /* label for SunOS style /dev/zero */
397 handle = NULL;
398 maxprot = VM_PROT_ALL;
399 pos = 0;
400 }
401
402 /*
403 * now let kernel internal function uvm_mmap do the work.
404 */
405
406 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
407 flags, handle, pos);
408
409 if (error == 0)
410 /* remember to add offset */
411 *retval = (register_t)(addr + pageoff);
412
413 return (error);
414 }
415
416 /*
417 * sys___msync13: the msync system call (a front-end for flush)
418 */
419
420 int
421 sys___msync13(p, v, retval)
422 struct proc *p;
423 void *v;
424 register_t *retval;
425 {
426 struct sys___msync13_args /* {
427 syscallarg(caddr_t) addr;
428 syscallarg(size_t) len;
429 syscallarg(int) flags;
430 } */ *uap = v;
431 vaddr_t addr;
432 vsize_t size, pageoff;
433 vm_map_t map;
434 int rv, flags, uvmflags;
435
436 /*
437 * extract syscall args from the uap
438 */
439
440 addr = (vaddr_t)SCARG(uap, addr);
441 size = (vsize_t)SCARG(uap, len);
442 flags = SCARG(uap, flags);
443
444 /* sanity check flags */
445 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
446 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
447 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
448 return (EINVAL);
449 if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
450 flags |= MS_SYNC;
451
452 /*
453 * align the address to a page boundary, and adjust the size accordingly
454 */
455
456 pageoff = (addr & PAGE_MASK);
457 addr -= pageoff;
458 size += pageoff;
459 size = (vsize_t) round_page(size);
460
461 /* disallow wrap-around. */
462 if (addr + size < addr)
463 return (EINVAL);
464
465 /*
466 * get map
467 */
468
469 map = &p->p_vmspace->vm_map;
470
471 /*
472 * XXXCDC: do we really need this semantic?
473 *
474 * XXX Gak! If size is zero we are supposed to sync "all modified
475 * pages with the region containing addr". Unfortunately, we
476 * don't really keep track of individual mmaps so we approximate
477 * by flushing the range of the map entry containing addr.
478 * This can be incorrect if the region splits or is coalesced
479 * with a neighbor.
480 */
481 if (size == 0) {
482 vm_map_entry_t entry;
483
484 vm_map_lock_read(map);
485 rv = uvm_map_lookup_entry(map, addr, &entry);
486 if (rv == TRUE) {
487 addr = entry->start;
488 size = entry->end - entry->start;
489 }
490 vm_map_unlock_read(map);
491 if (rv == FALSE)
492 return (EINVAL);
493 }
494
495 /*
496 * translate MS_ flags into PGO_ flags
497 */
498 uvmflags = (flags & MS_INVALIDATE) ? PGO_FREE : 0;
499 if (flags & MS_SYNC)
500 uvmflags |= PGO_SYNCIO;
501 else
502 uvmflags |= PGO_SYNCIO; /* XXXCDC: force sync for now! */
503
504 /*
505 * doit!
506 */
507 rv = uvm_map_clean(map, addr, addr+size, uvmflags);
508
509 /*
510 * and return...
511 */
512 switch (rv) {
513 case KERN_SUCCESS:
514 return(0);
515 case KERN_INVALID_ADDRESS:
516 return (ENOMEM);
517 case KERN_FAILURE:
518 return (EIO);
519 case KERN_PAGES_LOCKED: /* XXXCDC: uvm doesn't return this */
520 return (EBUSY);
521 default:
522 return (EINVAL);
523 }
524 /*NOTREACHED*/
525 }
526
527 /*
528 * sys_munmap: unmap a users memory
529 */
530
531 int
532 sys_munmap(p, v, retval)
533 register struct proc *p;
534 void *v;
535 register_t *retval;
536 {
537 register struct sys_munmap_args /* {
538 syscallarg(caddr_t) addr;
539 syscallarg(size_t) len;
540 } */ *uap = v;
541 vaddr_t addr;
542 vsize_t size, pageoff;
543 vm_map_t map;
544 vaddr_t vm_min_address = VM_MIN_ADDRESS;
545 struct vm_map_entry *dead_entries;
546
547 /*
548 * get syscall args...
549 */
550
551 addr = (vaddr_t) SCARG(uap, addr);
552 size = (vsize_t) SCARG(uap, len);
553
554 /*
555 * align the address to a page boundary, and adjust the size accordingly
556 */
557
558 pageoff = (addr & PAGE_MASK);
559 addr -= pageoff;
560 size += pageoff;
561 size = (vsize_t) round_page(size);
562
563 if ((int)size < 0)
564 return (EINVAL);
565 if (size == 0)
566 return (0);
567
568 /*
569 * Check for illegal addresses. Watch out for address wrap...
570 * Note that VM_*_ADDRESS are not constants due to casts (argh).
571 */
572 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
573 return (EINVAL);
574 if (vm_min_address > 0 && addr < vm_min_address)
575 return (EINVAL);
576 if (addr > addr + size)
577 return (EINVAL);
578 map = &p->p_vmspace->vm_map;
579
580
581 vm_map_lock(map); /* lock map so we can checkprot */
582
583 /*
584 * interesting system call semantic: make sure entire range is
585 * allocated before allowing an unmap.
586 */
587
588 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
589 vm_map_unlock(map);
590 return (EINVAL);
591 }
592
593 /*
594 * doit!
595 */
596 (void) uvm_unmap_remove(map, addr, addr + size, &dead_entries);
597
598 vm_map_unlock(map); /* and unlock */
599
600 if (dead_entries != NULL)
601 uvm_unmap_detach(dead_entries, 0);
602
603 return (0);
604 }
605
606 /*
607 * sys_mprotect: the mprotect system call
608 */
609
610 int
611 sys_mprotect(p, v, retval)
612 struct proc *p;
613 void *v;
614 register_t *retval;
615 {
616 struct sys_mprotect_args /* {
617 syscallarg(caddr_t) addr;
618 syscallarg(int) len;
619 syscallarg(int) prot;
620 } */ *uap = v;
621 vaddr_t addr;
622 vsize_t size, pageoff;
623 vm_prot_t prot;
624 int rv;
625
626 /*
627 * extract syscall args from uap
628 */
629
630 addr = (vaddr_t)SCARG(uap, addr);
631 size = (vsize_t)SCARG(uap, len);
632 prot = SCARG(uap, prot) & VM_PROT_ALL;
633
634 /*
635 * align the address to a page boundary, and adjust the size accordingly
636 */
637 pageoff = (addr & PAGE_MASK);
638 addr -= pageoff;
639 size += pageoff;
640 size = (vsize_t) round_page(size);
641 if ((int)size < 0)
642 return (EINVAL);
643
644 /*
645 * doit
646 */
647
648 rv = uvm_map_protect(&p->p_vmspace->vm_map,
649 addr, addr+size, prot, FALSE);
650
651 if (rv == KERN_SUCCESS)
652 return (0);
653 if (rv == KERN_PROTECTION_FAILURE)
654 return (EACCES);
655 return (EINVAL);
656 }
657
658 /*
659 * sys_minherit: the minherit system call
660 */
661
662 int
663 sys_minherit(p, v, retval)
664 struct proc *p;
665 void *v;
666 register_t *retval;
667 {
668 struct sys_minherit_args /* {
669 syscallarg(caddr_t) addr;
670 syscallarg(int) len;
671 syscallarg(int) inherit;
672 } */ *uap = v;
673 vaddr_t addr;
674 vsize_t size, pageoff;
675 register vm_inherit_t inherit;
676
677 addr = (vaddr_t)SCARG(uap, addr);
678 size = (vsize_t)SCARG(uap, len);
679 inherit = SCARG(uap, inherit);
680 /*
681 * align the address to a page boundary, and adjust the size accordingly
682 */
683
684 pageoff = (addr & PAGE_MASK);
685 addr -= pageoff;
686 size += pageoff;
687 size = (vsize_t) round_page(size);
688
689 if ((int)size < 0)
690 return (EINVAL);
691
692 switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
693 inherit)) {
694 case KERN_SUCCESS:
695 return (0);
696 case KERN_PROTECTION_FAILURE:
697 return (EACCES);
698 }
699 return (EINVAL);
700 }
701
702 /*
703 * sys_mlock: memory lock
704 */
705
706 int
707 sys_mlock(p, v, retval)
708 struct proc *p;
709 void *v;
710 register_t *retval;
711 {
712 struct sys_mlock_args /* {
713 syscallarg(const void *) addr;
714 syscallarg(size_t) len;
715 } */ *uap = v;
716 vaddr_t addr;
717 vsize_t size, pageoff;
718 int error;
719
720 /*
721 * extract syscall args from uap
722 */
723 addr = (vaddr_t)SCARG(uap, addr);
724 size = (vsize_t)SCARG(uap, len);
725
726 /*
727 * align the address to a page boundary and adjust the size accordingly
728 */
729 pageoff = (addr & PAGE_MASK);
730 addr -= pageoff;
731 size += pageoff;
732 size = (vsize_t) round_page(size);
733
734 /* disallow wrap-around. */
735 if (addr + (int)size < addr)
736 return (EINVAL);
737
738 if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
739 return (EAGAIN);
740
741 #ifdef pmap_wired_count
742 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
743 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
744 return (EAGAIN);
745 #else
746 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
747 return (error);
748 #endif
749
750 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
751 return (error == KERN_SUCCESS ? 0 : ENOMEM);
752 }
753
754 /*
755 * sys_munlock: unlock wired pages
756 */
757
758 int
759 sys_munlock(p, v, retval)
760 struct proc *p;
761 void *v;
762 register_t *retval;
763 {
764 struct sys_munlock_args /* {
765 syscallarg(const void *) addr;
766 syscallarg(size_t) len;
767 } */ *uap = v;
768 vaddr_t addr;
769 vsize_t size, pageoff;
770 int error;
771
772 /*
773 * extract syscall args from uap
774 */
775
776 addr = (vaddr_t)SCARG(uap, addr);
777 size = (vsize_t)SCARG(uap, len);
778
779 /*
780 * align the address to a page boundary, and adjust the size accordingly
781 */
782 pageoff = (addr & PAGE_MASK);
783 addr -= pageoff;
784 size += pageoff;
785 size = (vsize_t) round_page(size);
786
787 /* disallow wrap-around. */
788 if (addr + (int)size < addr)
789 return (EINVAL);
790
791 #ifndef pmap_wired_count
792 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
793 return (error);
794 #endif
795
796 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
797 return (error == KERN_SUCCESS ? 0 : ENOMEM);
798 }
799
800 /*
801 * uvm_mmap: internal version of mmap
802 *
803 * - used by sys_mmap, exec, and sysv shm
804 * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true,
805 * sysv shm uses "named anonymous memory")
806 * - caller must page-align the file offset
807 */
808
809 int
810 uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
811 vm_map_t map;
812 vaddr_t *addr;
813 vsize_t size;
814 vm_prot_t prot, maxprot;
815 int flags;
816 caddr_t handle; /* XXX: VNODE? */
817 vaddr_t foff;
818 {
819 struct uvm_object *uobj;
820 struct vnode *vp;
821 int retval;
822 int advice = UVM_ADV_NORMAL;
823 uvm_flag_t uvmflag = 0;
824
825 /*
826 * check params
827 */
828
829 if (size == 0)
830 return(0);
831 if (foff & PAGE_MASK)
832 return(EINVAL);
833 if ((prot & maxprot) != prot)
834 return(EINVAL);
835
836 /*
837 * for non-fixed mappings, round off the suggested address.
838 * for fixed mappings, check alignment and zap old mappings.
839 */
840
841 if ((flags & MAP_FIXED) == 0) {
842 *addr = round_page(*addr); /* round */
843 } else {
844
845 if (*addr & PAGE_MASK)
846 return(EINVAL);
847 uvmflag |= UVM_FLAG_FIXED;
848 (void) uvm_unmap(map, *addr, *addr + size); /* zap! */
849 }
850
851 /*
852 * handle anon vs. non-anon mappings. for non-anon mappings attach
853 * to underlying vm object.
854 */
855
856 if (flags & MAP_ANON) {
857
858 foff = UVM_UNKNOWN_OFFSET;
859 uobj = NULL;
860 if ((flags & MAP_SHARED) == 0)
861 /* XXX: defer amap create */
862 uvmflag |= UVM_FLAG_COPYONW;
863 else
864 /* shared: create amap now */
865 uvmflag |= UVM_FLAG_OVERLAY;
866
867 } else {
868
869 vp = (struct vnode *) handle; /* get vnode */
870 if (vp->v_type != VCHR) {
871 uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
872 maxprot : (maxprot & ~VM_PROT_WRITE));
873
874 /*
875 * XXXCDC: hack from old code
876 * don't allow vnodes which have been mapped
877 * shared-writeable to persist [forces them to be
878 * flushed out when last reference goes].
879 * XXXCDC: interesting side effect: avoids a bug.
880 * note that in WRITE [ufs_readwrite.c] that we
881 * allocate buffer, uncache, and then do the write.
882 * the problem with this is that if the uncache causes
883 * VM data to be flushed to the same area of the file
884 * we are writing to... in that case we've got the
885 * buffer locked and our process goes to sleep forever.
886 *
887 * XXXCDC: checking maxprot protects us from the
888 * "persistbug" program but this is not a long term
889 * solution.
890 *
891 * XXXCDC: we don't bother calling uncache with the vp
892 * VOP_LOCKed since we know that we are already
893 * holding a valid reference to the uvn (from the
894 * uvn_attach above), and thus it is impossible for
895 * the uncache to kill the uvn and trigger I/O.
896 */
897 if (flags & MAP_SHARED) {
898 if ((prot & VM_PROT_WRITE) ||
899 (maxprot & VM_PROT_WRITE)) {
900 uvm_vnp_uncache(vp);
901 }
902 }
903
904 } else {
905 uobj = udv_attach((void *) &vp->v_rdev,
906 (flags & MAP_SHARED) ?
907 maxprot : (maxprot & ~VM_PROT_WRITE));
908 advice = UVM_ADV_RANDOM;
909 }
910
911 if (uobj == NULL)
912 return((vp->v_type == VREG) ? ENOMEM : EINVAL);
913
914 if ((flags & MAP_SHARED) == 0)
915 uvmflag |= UVM_FLAG_COPYONW;
916 }
917
918 /*
919 * set up mapping flags
920 */
921
922 uvmflag = UVM_MAPFLAG(prot, maxprot,
923 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
924 advice, uvmflag);
925
926 /*
927 * do it!
928 */
929
930 retval = uvm_map(map, addr, size, uobj, foff, uvmflag);
931
932 if (retval == KERN_SUCCESS)
933 return(0);
934
935 /*
936 * errors: first detach from the uobj, if any.
937 */
938
939 if (uobj)
940 uobj->pgops->pgo_detach(uobj);
941
942 switch (retval) {
943 case KERN_INVALID_ADDRESS:
944 case KERN_NO_SPACE:
945 return(ENOMEM);
946 case KERN_PROTECTION_FAILURE:
947 return(EACCES);
948 }
949 return(EINVAL);
950 }
951