uvm_mmap.c revision 1.11 1 /* $NetBSD: uvm_mmap.c,v 1.11 1998/07/07 23:22:13 thorpej Exp $ */
2
3 /*
4 * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
5 * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
6 */
7 /*
8 * Copyright (c) 1997 Charles D. Cranor and Washington University.
9 * Copyright (c) 1991, 1993 The Regents of the University of California.
10 * Copyright (c) 1988 University of Utah.
11 *
12 * All rights reserved.
13 *
14 * This code is derived from software contributed to Berkeley by
15 * the Systems Programming Group of the University of Utah Computer
16 * Science Department.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the Charles D. Cranor,
29 * Washington University, University of California, Berkeley and
30 * its contributors.
31 * 4. Neither the name of the University nor the names of its contributors
32 * may be used to endorse or promote products derived from this software
33 * without specific prior written permission.
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
36 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
40 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
41 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
43 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
44 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 * SUCH DAMAGE.
46 *
47 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
48 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
49 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
50 */
51
52 /*
53 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
54 * function.
55 */
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/file.h>
59 #include <sys/filedesc.h>
60 #include <sys/resourcevar.h>
61 #include <sys/mman.h>
62 #include <sys/mount.h>
63 #include <sys/proc.h>
64 #include <sys/malloc.h>
65 #include <sys/vnode.h>
66 #include <sys/conf.h>
67 #include <sys/stat.h>
68
69 #include <miscfs/specfs/specdev.h>
70
71 #include <vm/vm.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_kern.h>
74
75 #include <sys/syscallargs.h>
76
77 #include <uvm/uvm.h>
78 #include <uvm/uvm_device.h>
79 #include <uvm/uvm_vnode.h>
80
81
82 /*
83 * unimplemented VM system calls:
84 */
85
86 /*
87 * sys_sbrk: sbrk system call.
88 */
89
90 /* ARGSUSED */
91 int
92 sys_sbrk(p, v, retval)
93 struct proc *p;
94 void *v;
95 register_t *retval;
96 {
97 #if 0
98 struct sys_sbrk_args /* {
99 syscallarg(int) incr;
100 } */ *uap = v;
101 #endif
102
103 return (EOPNOTSUPP);
104 }
105
106 /*
107 * sys_sstk: sstk system call.
108 */
109
110 /* ARGSUSED */
111 int
112 sys_sstk(p, v, retval)
113 struct proc *p;
114 void *v;
115 register_t *retval;
116 {
117 #if 0
118 struct sys_sstk_args /* {
119 syscallarg(int) incr;
120 } */ *uap = v;
121 #endif
122
123 return (EOPNOTSUPP);
124 }
125
126 /*
127 * sys_madvise: give advice about memory usage.
128 */
129
130 /* ARGSUSED */
131 int
132 sys_madvise(p, v, retval)
133 struct proc *p;
134 void *v;
135 register_t *retval;
136 {
137 #if 0
138 struct sys_madvise_args /* {
139 syscallarg(caddr_t) addr;
140 syscallarg(size_t) len;
141 syscallarg(int) behav;
142 } */ *uap = v;
143 #endif
144
145 return (EOPNOTSUPP);
146 }
147
148 /*
149 * sys_mincore: determine if pages are in core or not.
150 */
151
152 /* ARGSUSED */
153 int
154 sys_mincore(p, v, retval)
155 struct proc *p;
156 void *v;
157 register_t *retval;
158 {
159 #if 0
160 struct sys_mincore_args /* {
161 syscallarg(caddr_t) addr;
162 syscallarg(size_t) len;
163 syscallarg(char *) vec;
164 } */ *uap = v;
165 #endif
166
167 return (EOPNOTSUPP);
168 }
169
170 #if 0
171 /*
172 * munmapfd: unmap file descriptor
173 *
174 * XXX: is this acutally a useful function? could it be useful?
175 */
176
177 void
178 munmapfd(p, fd)
179 struct proc *p;
180 int fd;
181 {
182
183 /*
184 * XXX should vm_deallocate any regions mapped to this file
185 */
186 p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
187 }
188 #endif
189
190 /*
191 * sys_mmap: mmap system call.
192 *
193 * => file offest and address may not be page aligned
194 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
195 * - if address isn't page aligned the mapping starts at trunc_page(addr)
196 * and the return value is adjusted up by the page offset.
197 */
198
199 int
200 sys_mmap(p, v, retval)
201 struct proc *p;
202 void *v;
203 register_t *retval;
204 {
205 register struct sys_mmap_args /* {
206 syscallarg(caddr_t) addr;
207 syscallarg(size_t) len;
208 syscallarg(int) prot;
209 syscallarg(int) flags;
210 syscallarg(int) fd;
211 syscallarg(long) pad;
212 syscallarg(off_t) pos;
213 } */ *uap = v;
214 vm_offset_t addr;
215 struct vattr va;
216 off_t pos;
217 vm_size_t size, pageoff;
218 vm_prot_t prot, maxprot;
219 int flags, fd;
220 vm_offset_t vm_min_address = VM_MIN_ADDRESS;
221 register struct filedesc *fdp = p->p_fd;
222 register struct file *fp;
223 struct vnode *vp;
224 caddr_t handle;
225 int error;
226
227 /*
228 * first, extract syscall args from the uap.
229 */
230
231 addr = (vm_offset_t) SCARG(uap, addr);
232 size = (vm_size_t) SCARG(uap, len);
233 prot = SCARG(uap, prot) & VM_PROT_ALL;
234 flags = SCARG(uap, flags);
235 fd = SCARG(uap, fd);
236 pos = SCARG(uap, pos);
237
238 /*
239 * make sure that the newsize fits within a vm_offset_t
240 * XXX: need to revise addressing data types
241 */
242 if (pos + size > (vm_offset_t)-PAGE_SIZE) {
243 #ifdef DEBUG
244 printf("mmap: pos=%qx, size=%x too big\n", pos, (int)size);
245 #endif
246 return (EINVAL);
247 }
248
249 /*
250 * align file position and save offset. adjust size.
251 */
252
253 pageoff = (pos & PAGE_MASK);
254 pos -= pageoff;
255 size += pageoff; /* add offset */
256 size = (vm_size_t) round_page(size); /* round up */
257 if ((ssize_t) size < 0)
258 return (EINVAL); /* don't allow wrap */
259
260 /*
261 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
262 */
263
264 if (flags & MAP_FIXED) {
265
266 /* ensure address and file offset are aligned properly */
267 addr -= pageoff;
268 if (addr & PAGE_MASK)
269 return (EINVAL);
270
271 if (VM_MAXUSER_ADDRESS > 0 &&
272 (addr + size) > VM_MAXUSER_ADDRESS)
273 return (EINVAL);
274 if (vm_min_address > 0 && addr < vm_min_address)
275 return (EINVAL);
276 if (addr > addr + size)
277 return (EINVAL); /* no wrapping! */
278
279 } else {
280
281 /*
282 * not fixed: make sure we skip over the largest possible heap.
283 * we will refine our guess later (e.g. to account for VAC, etc)
284 */
285 if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
286 addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
287 }
288
289 /*
290 * check for file mappings (i.e. not anonymous) and verify file.
291 */
292
293 if ((flags & MAP_ANON) == 0) {
294
295 if (fd < 0 || fd >= fdp->fd_nfiles)
296 return(EBADF); /* failed range check? */
297 fp = fdp->fd_ofiles[fd]; /* convert to file pointer */
298 if (fp == NULL)
299 return(EBADF);
300
301 if (fp->f_type != DTYPE_VNODE)
302 return (ENODEV); /* only mmap vnodes! */
303 vp = (struct vnode *)fp->f_data; /* convert to vnode */
304
305 if (vp->v_type != VREG && vp->v_type != VCHR &&
306 vp->v_type != VBLK)
307 return (ENODEV); /* only REG/CHR/BLK support mmap */
308
309 /* special case: catch SunOS style /dev/zero */
310 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
311 flags |= MAP_ANON;
312 goto is_anon;
313 }
314
315 /*
316 * Old programs may not select a specific sharing type, so
317 * default to an appropriate one.
318 *
319 * XXX: how does MAP_ANON fit in the picture?
320 */
321 if ((flags & (MAP_SHARED|MAP_PRIVATE|MAP_COPY)) == 0) {
322 #if defined(DEBUG)
323 printf("WARNING: defaulted mmap() share type to "
324 "%s (pid %d comm %s)\n", vp->v_type == VCHR ?
325 "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
326 p->p_comm);
327 #endif
328 if (vp->v_type == VCHR)
329 flags |= MAP_SHARED; /* for a device */
330 else
331 flags |= MAP_PRIVATE; /* for a file */
332 }
333
334 /*
335 * MAP_PRIVATE device mappings don't make sense (and aren't
336 * supported anyway). However, some programs rely on this,
337 * so just change it to MAP_SHARED.
338 */
339 if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
340 #if defined(DIAGNOSTIC)
341 printf("WARNING: converted MAP_PRIVATE device mapping "
342 "to MAP_SHARED (pid %d comm %s)\n", p->p_pid,
343 p->p_comm);
344 #endif
345 flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
346 }
347
348 /*
349 * now check protection
350 */
351
352 maxprot = VM_PROT_EXECUTE;
353
354 /* check read access */
355 if (fp->f_flag & FREAD)
356 maxprot |= VM_PROT_READ;
357 else if (prot & PROT_READ)
358 return (EACCES);
359
360 /* check write access, shared case first */
361 if (flags & MAP_SHARED) {
362 /*
363 * if the file is writable, only add PROT_WRITE to
364 * maxprot if the file is not immutable, append-only.
365 * otherwise, if we have asked for PROT_WRITE, return
366 * EPERM.
367 */
368 if (fp->f_flag & FWRITE) {
369 if ((error =
370 VOP_GETATTR(vp, &va, p->p_ucred, p)))
371 return (error);
372 if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
373 maxprot |= VM_PROT_WRITE;
374 else if (prot & PROT_WRITE)
375 return (EPERM);
376 }
377 else if (prot & PROT_WRITE)
378 return (EACCES);
379 } else {
380 /* MAP_PRIVATE mappings can always write to */
381 maxprot |= VM_PROT_WRITE;
382 }
383
384 /*
385 * set handle to vnode
386 */
387
388 handle = (caddr_t)vp;
389
390 } else { /* MAP_ANON case */
391
392 if (fd != -1)
393 return (EINVAL);
394
395 is_anon: /* label for SunOS style /dev/zero */
396 handle = NULL;
397 maxprot = VM_PROT_ALL;
398 pos = 0;
399 }
400
401 /*
402 * now let kernel internal function uvm_mmap do the work.
403 */
404
405 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
406 flags, handle, pos);
407
408 if (error == 0)
409 /* remember to add offset */
410 *retval = (register_t)(addr + pageoff);
411
412 return (error);
413 }
414
415 /*
416 * sys___msync13: the msync system call (a front-end for flush)
417 */
418
419 int
420 sys___msync13(p, v, retval)
421 struct proc *p;
422 void *v;
423 register_t *retval;
424 {
425 struct sys___msync13_args /* {
426 syscallarg(caddr_t) addr;
427 syscallarg(size_t) len;
428 syscallarg(int) flags;
429 } */ *uap = v;
430 vm_offset_t addr;
431 vm_size_t size, pageoff;
432 vm_map_t map;
433 int rv, flags, uvmflags;
434
435 /*
436 * extract syscall args from the uap
437 */
438
439 addr = (vm_offset_t)SCARG(uap, addr);
440 size = (vm_size_t)SCARG(uap, len);
441 flags = SCARG(uap, flags);
442
443 /* sanity check flags */
444 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
445 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
446 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
447 return (EINVAL);
448 if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
449 flags |= MS_SYNC;
450
451 /*
452 * align the address to a page boundary, and adjust the size accordingly
453 */
454
455 pageoff = (addr & PAGE_MASK);
456 addr -= pageoff;
457 size += pageoff;
458 size = (vm_size_t) round_page(size);
459
460 /* disallow wrap-around. */
461 if (addr + size < addr)
462 return (EINVAL);
463
464 /*
465 * get map
466 */
467
468 map = &p->p_vmspace->vm_map;
469
470 /*
471 * XXXCDC: do we really need this semantic?
472 *
473 * XXX Gak! If size is zero we are supposed to sync "all modified
474 * pages with the region containing addr". Unfortunately, we
475 * don't really keep track of individual mmaps so we approximate
476 * by flushing the range of the map entry containing addr.
477 * This can be incorrect if the region splits or is coalesced
478 * with a neighbor.
479 */
480 if (size == 0) {
481 vm_map_entry_t entry;
482
483 vm_map_lock_read(map);
484 rv = uvm_map_lookup_entry(map, addr, &entry);
485 if (rv == TRUE) {
486 addr = entry->start;
487 size = entry->end - entry->start;
488 }
489 vm_map_unlock_read(map);
490 if (rv == FALSE)
491 return (EINVAL);
492 }
493
494 /*
495 * translate MS_ flags into PGO_ flags
496 */
497 uvmflags = (flags & MS_INVALIDATE) ? PGO_FREE : 0;
498 if (flags & MS_SYNC)
499 uvmflags |= PGO_SYNCIO;
500 else
501 uvmflags |= PGO_SYNCIO; /* XXXCDC: force sync for now! */
502
503 /*
504 * doit!
505 */
506 rv = uvm_map_clean(map, addr, addr+size, uvmflags);
507
508 /*
509 * and return...
510 */
511 switch (rv) {
512 case KERN_SUCCESS:
513 return(0);
514 case KERN_INVALID_ADDRESS:
515 return (ENOMEM);
516 case KERN_FAILURE:
517 return (EIO);
518 case KERN_PAGES_LOCKED: /* XXXCDC: uvm doesn't return this */
519 return (EBUSY);
520 default:
521 return (EINVAL);
522 }
523 /*NOTREACHED*/
524 }
525
526 /*
527 * sys_munmap: unmap a users memory
528 */
529
530 int
531 sys_munmap(p, v, retval)
532 register struct proc *p;
533 void *v;
534 register_t *retval;
535 {
536 register struct sys_munmap_args /* {
537 syscallarg(caddr_t) addr;
538 syscallarg(size_t) len;
539 } */ *uap = v;
540 vm_offset_t addr;
541 vm_size_t size, pageoff;
542 vm_map_t map;
543 vm_offset_t vm_min_address = VM_MIN_ADDRESS;
544 struct vm_map_entry *dead_entries;
545
546 /*
547 * get syscall args...
548 */
549
550 addr = (vm_offset_t) SCARG(uap, addr);
551 size = (vm_size_t) SCARG(uap, len);
552
553 /*
554 * align the address to a page boundary, and adjust the size accordingly
555 */
556
557 pageoff = (addr & PAGE_MASK);
558 addr -= pageoff;
559 size += pageoff;
560 size = (vm_size_t) round_page(size);
561
562 if ((int)size < 0)
563 return (EINVAL);
564 if (size == 0)
565 return (0);
566
567 /*
568 * Check for illegal addresses. Watch out for address wrap...
569 * Note that VM_*_ADDRESS are not constants due to casts (argh).
570 */
571 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
572 return (EINVAL);
573 if (vm_min_address > 0 && addr < vm_min_address)
574 return (EINVAL);
575 if (addr > addr + size)
576 return (EINVAL);
577 map = &p->p_vmspace->vm_map;
578
579
580 vm_map_lock(map); /* lock map so we can checkprot */
581
582 /*
583 * interesting system call semantic: make sure entire range is
584 * allocated before allowing an unmap.
585 */
586
587 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
588 vm_map_unlock(map);
589 return (EINVAL);
590 }
591
592 /*
593 * doit!
594 */
595 (void) uvm_unmap_remove(map, addr, addr + size, 0, &dead_entries);
596
597 vm_map_unlock(map); /* and unlock */
598
599 if (dead_entries != NULL)
600 uvm_unmap_detach(dead_entries, 0);
601
602 return (0);
603 }
604
605 /*
606 * sys_mprotect: the mprotect system call
607 */
608
609 int
610 sys_mprotect(p, v, retval)
611 struct proc *p;
612 void *v;
613 register_t *retval;
614 {
615 struct sys_mprotect_args /* {
616 syscallarg(caddr_t) addr;
617 syscallarg(int) len;
618 syscallarg(int) prot;
619 } */ *uap = v;
620 vm_offset_t addr;
621 vm_size_t size, pageoff;
622 vm_prot_t prot;
623 int rv;
624
625 /*
626 * extract syscall args from uap
627 */
628
629 addr = (vm_offset_t)SCARG(uap, addr);
630 size = (vm_size_t)SCARG(uap, len);
631 prot = SCARG(uap, prot) & VM_PROT_ALL;
632
633 /*
634 * align the address to a page boundary, and adjust the size accordingly
635 */
636 pageoff = (addr & PAGE_MASK);
637 addr -= pageoff;
638 size += pageoff;
639 size = (vm_size_t) round_page(size);
640 if ((int)size < 0)
641 return (EINVAL);
642
643 /*
644 * doit
645 */
646
647 rv = uvm_map_protect(&p->p_vmspace->vm_map,
648 addr, addr+size, prot, FALSE);
649
650 if (rv == KERN_SUCCESS)
651 return (0);
652 if (rv == KERN_PROTECTION_FAILURE)
653 return (EACCES);
654 return (EINVAL);
655 }
656
657 /*
658 * sys_minherit: the minherit system call
659 */
660
661 int
662 sys_minherit(p, v, retval)
663 struct proc *p;
664 void *v;
665 register_t *retval;
666 {
667 struct sys_minherit_args /* {
668 syscallarg(caddr_t) addr;
669 syscallarg(int) len;
670 syscallarg(int) inherit;
671 } */ *uap = v;
672 vm_offset_t addr;
673 vm_size_t size, pageoff;
674 register vm_inherit_t inherit;
675
676 addr = (vm_offset_t)SCARG(uap, addr);
677 size = (vm_size_t)SCARG(uap, len);
678 inherit = SCARG(uap, inherit);
679 /*
680 * align the address to a page boundary, and adjust the size accordingly
681 */
682
683 pageoff = (addr & PAGE_MASK);
684 addr -= pageoff;
685 size += pageoff;
686 size = (vm_size_t) round_page(size);
687
688 if ((int)size < 0)
689 return (EINVAL);
690
691 switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
692 inherit)) {
693 case KERN_SUCCESS:
694 return (0);
695 case KERN_PROTECTION_FAILURE:
696 return (EACCES);
697 }
698 return (EINVAL);
699 }
700
701 /*
702 * sys_mlock: memory lock
703 */
704
705 int
706 sys_mlock(p, v, retval)
707 struct proc *p;
708 void *v;
709 register_t *retval;
710 {
711 struct sys_mlock_args /* {
712 syscallarg(const void *) addr;
713 syscallarg(size_t) len;
714 } */ *uap = v;
715 vm_offset_t addr;
716 vm_size_t size, pageoff;
717 int error;
718
719 /*
720 * extract syscall args from uap
721 */
722 addr = (vm_offset_t)SCARG(uap, addr);
723 size = (vm_size_t)SCARG(uap, len);
724
725 /*
726 * align the address to a page boundary and adjust the size accordingly
727 */
728 pageoff = (addr & PAGE_MASK);
729 addr -= pageoff;
730 size += pageoff;
731 size = (vm_size_t) round_page(size);
732
733 /* disallow wrap-around. */
734 if (addr + (int)size < addr)
735 return (EINVAL);
736
737 if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
738 return (EAGAIN);
739
740 #ifdef pmap_wired_count
741 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
742 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
743 return (EAGAIN);
744 #else
745 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
746 return (error);
747 #endif
748
749 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
750 return (error == KERN_SUCCESS ? 0 : ENOMEM);
751 }
752
753 /*
754 * sys_munlock: unlock wired pages
755 */
756
757 int
758 sys_munlock(p, v, retval)
759 struct proc *p;
760 void *v;
761 register_t *retval;
762 {
763 struct sys_munlock_args /* {
764 syscallarg(const void *) addr;
765 syscallarg(size_t) len;
766 } */ *uap = v;
767 vm_offset_t addr;
768 vm_size_t size, pageoff;
769 int error;
770
771 /*
772 * extract syscall args from uap
773 */
774
775 addr = (vm_offset_t)SCARG(uap, addr);
776 size = (vm_size_t)SCARG(uap, len);
777
778 /*
779 * align the address to a page boundary, and adjust the size accordingly
780 */
781 pageoff = (addr & PAGE_MASK);
782 addr -= pageoff;
783 size += pageoff;
784 size = (vm_size_t) round_page(size);
785
786 /* disallow wrap-around. */
787 if (addr + (int)size < addr)
788 return (EINVAL);
789
790 #ifndef pmap_wired_count
791 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
792 return (error);
793 #endif
794
795 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
796 return (error == KERN_SUCCESS ? 0 : ENOMEM);
797 }
798
799 /*
800 * uvm_mmap: internal version of mmap
801 *
802 * - used by sys_mmap, exec, and sysv shm
803 * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true,
804 * sysv shm uses "named anonymous memory")
805 * - caller must page-align the file offset
806 */
807
808 int
809 uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
810 vm_map_t map;
811 vm_offset_t *addr;
812 vm_size_t size;
813 vm_prot_t prot, maxprot;
814 int flags;
815 caddr_t handle; /* XXX: VNODE? */
816 vm_offset_t foff;
817 {
818 struct uvm_object *uobj;
819 struct vnode *vp;
820 int retval;
821 int advice = UVM_ADV_NORMAL;
822 uvm_flag_t uvmflag = 0;
823
824 /*
825 * check params
826 */
827
828 if (size == 0)
829 return(0);
830 if (foff & PAGE_MASK)
831 return(EINVAL);
832 if ((prot & maxprot) != prot)
833 return(EINVAL);
834
835 /*
836 * for non-fixed mappings, round off the suggested address.
837 * for fixed mappings, check alignment and zap old mappings.
838 */
839
840 if ((flags & MAP_FIXED) == 0) {
841 *addr = round_page(*addr); /* round */
842 } else {
843
844 if (*addr & PAGE_MASK)
845 return(EINVAL);
846 uvmflag |= UVM_FLAG_FIXED;
847 (void) uvm_unmap(map, *addr, *addr + size, 0); /* zap! */
848 }
849
850 /*
851 * handle anon vs. non-anon mappings. for non-anon mappings attach
852 * to underlying vm object.
853 */
854
855 if (flags & MAP_ANON) {
856
857 foff = UVM_UNKNOWN_OFFSET;
858 uobj = NULL;
859 if ((flags & MAP_SHARED) == 0)
860 /* XXX: defer amap create */
861 uvmflag |= UVM_FLAG_COPYONW;
862 else
863 /* shared: create amap now */
864 uvmflag |= UVM_FLAG_OVERLAY;
865
866 } else {
867
868 vp = (struct vnode *) handle; /* get vnode */
869 if (vp->v_type != VCHR) {
870 uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
871 maxprot : (maxprot & ~VM_PROT_WRITE));
872
873 /*
874 * XXXCDC: hack from old code
875 * don't allow vnodes which have been mapped
876 * shared-writeable to persist [forces them to be
877 * flushed out when last reference goes].
878 * XXXCDC: interesting side effect: avoids a bug.
879 * note that in WRITE [ufs_readwrite.c] that we
880 * allocate buffer, uncache, and then do the write.
881 * the problem with this is that if the uncache causes
882 * VM data to be flushed to the same area of the file
883 * we are writing to... in that case we've got the
884 * buffer locked and our process goes to sleep forever.
885 *
886 * XXXCDC: checking maxprot protects us from the
887 * "persistbug" program but this is not a long term
888 * solution.
889 *
890 * XXXCDC: we don't bother calling uncache with the vp
891 * VOP_LOCKed since we know that we are already
892 * holding a valid reference to the uvn (from the
893 * uvn_attach above), and thus it is impossible for
894 * the uncache to kill the uvn and trigger I/O.
895 */
896 if (flags & MAP_SHARED) {
897 if ((prot & VM_PROT_WRITE) ||
898 (maxprot & VM_PROT_WRITE)) {
899 uvm_vnp_uncache(vp);
900 }
901 }
902
903 } else {
904 uobj = udv_attach((void *) &vp->v_rdev,
905 (flags & MAP_SHARED) ?
906 maxprot : (maxprot & ~VM_PROT_WRITE));
907 advice = UVM_ADV_RANDOM;
908 }
909
910 if (uobj == NULL)
911 return((vp->v_type == VREG) ? ENOMEM : EINVAL);
912
913 if ((flags & MAP_SHARED) == 0)
914 uvmflag |= UVM_FLAG_COPYONW;
915 }
916
917 /*
918 * set up mapping flags
919 */
920
921 uvmflag = UVM_MAPFLAG(prot, maxprot,
922 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
923 advice, uvmflag);
924
925 /*
926 * do it!
927 */
928
929 retval = uvm_map(map, addr, size, uobj, foff, uvmflag);
930
931 if (retval == KERN_SUCCESS)
932 return(0);
933
934 /*
935 * errors: first detach from the uobj, if any.
936 */
937
938 if (uobj)
939 uobj->pgops->pgo_detach(uobj);
940
941 switch (retval) {
942 case KERN_INVALID_ADDRESS:
943 case KERN_NO_SPACE:
944 return(ENOMEM);
945 case KERN_PROTECTION_FAILURE:
946 return(EACCES);
947 }
948 return(EINVAL);
949 }
950