Home | History | Annotate | Line # | Download | only in uvm
uvm_mmap.c revision 1.1
      1 /*	$Id: uvm_mmap.c,v 1.1 1998/02/05 06:25:09 mrg Exp $	*/
      2 
      3 /*
      4  * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
      5  *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
      6  */
      7 /*
      8  * Copyright (c) 1997 Charles D. Cranor and Washington University.
      9  * Copyright (c) 1991, 1993 The Regents of the University of California.
     10  * Copyright (c) 1988 University of Utah.
     11  *
     12  * All rights reserved.
     13  *
     14  * This code is derived from software contributed to Berkeley by
     15  * the Systems Programming Group of the University of Utah Computer
     16  * Science Department.
     17  *
     18  * Redistribution and use in source and binary forms, with or without
     19  * modification, are permitted provided that the following conditions
     20  * are met:
     21  * 1. Redistributions of source code must retain the above copyright
     22  *    notice, this list of conditions and the following disclaimer.
     23  * 2. Redistributions in binary form must reproduce the above copyright
     24  *    notice, this list of conditions and the following disclaimer in the
     25  *    documentation and/or other materials provided with the distribution.
     26  * 3. All advertising materials mentioning features or use of this software
     27  *    must display the following acknowledgement:
     28  *      This product includes software developed by the Charles D. Cranor,
     29  *	Washington University, University of California, Berkeley and
     30  *	its contributors.
     31  * 4. Neither the name of the University nor the names of its contributors
     32  *    may be used to endorse or promote products derived from this software
     33  *    without specific prior written permission.
     34  *
     35  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     36  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     38  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     39  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     40  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     41  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     42  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     43  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     44  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     45  * SUCH DAMAGE.
     46  *
     47  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
     48  *
     49  *      @(#)vm_mmap.c   8.5 (Berkeley) 5/19/94
     50  */
     51 
     52 /*
     53  * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
     54  * function.
     55  */
     56 #include <sys/param.h>
     57 #include <sys/systm.h>
     58 #include <sys/file.h>
     59 #include <sys/filedesc.h>
     60 #include <sys/resourcevar.h>
     61 #include <sys/mman.h>
     62 #include <sys/mount.h>
     63 #include <sys/proc.h>
     64 #include <sys/malloc.h>
     65 #include <sys/vnode.h>
     66 #include <sys/conf.h>
     67 
     68 #include <miscfs/specfs/specdev.h>
     69 
     70 #include <vm/vm.h>
     71 #include <vm/vm_page.h>
     72 #include <vm/vm_kern.h>
     73 
     74 #include <sys/syscallargs.h>
     75 
     76 #include <uvm/uvm.h>
     77 #include <uvm/uvm_device.h>
     78 #include <uvm/uvm_vnode.h>
     79 
     80 
     81 /*
     82  * unimplemented VM system calls:
     83  */
     84 
     85 /*
     86  * sys_sbrk: sbrk system call.
     87  */
     88 
     89 /* ARGSUSED */
     90 int sys_sbrk(p, v, retval)
     91 
     92 struct proc *p;
     93 void *v;
     94 register_t *retval;
     95 
     96 {
     97 #if 0
     98   struct sys_sbrk_args /* {
     99 			  syscallarg(int) incr;
    100 			  } */ *uap = v;
    101 #endif
    102   return (EOPNOTSUPP);
    103 }
    104 
    105 /*
    106  * sys_sstk: sstk system call.
    107  */
    108 
    109 /* ARGSUSED */
    110 int sys_sstk(p, v, retval)
    111 
    112 struct proc *p;
    113 void *v;
    114 register_t *retval;
    115 
    116 {
    117 #if 0
    118   struct sys_sstk_args /* {
    119 			  syscallarg(int) incr;
    120 			  } */ *uap = v;
    121 #endif
    122   return (EOPNOTSUPP);
    123 }
    124 
    125 /*
    126  * sys_madvise: give advice about memory usage.
    127  */
    128 
    129 /* ARGSUSED */
    130 int sys_madvise(p, v, retval)
    131 
    132 struct proc *p;
    133 void *v;
    134 register_t *retval;
    135 
    136 {
    137 #if 0
    138   struct sys_madvise_args /* {
    139 			     syscallarg(caddr_t) addr;
    140 			     syscallarg(size_t) len;
    141 			     syscallarg(int) behav;
    142 			     } */ *uap = v;
    143 #endif
    144   return (EOPNOTSUPP);
    145 }
    146 
    147 /*
    148  * sys_mincore: determine if pages are in core or not.
    149  */
    150 
    151 /* ARGSUSED */
    152 int sys_mincore(p, v, retval)
    153 
    154 struct proc *p;
    155 void *v;
    156 register_t *retval;
    157 
    158 {
    159 #if 0
    160   struct sys_mincore_args /* {
    161 			     syscallarg(caddr_t) addr;
    162 			     syscallarg(size_t) len;
    163 			     syscallarg(char *) vec;
    164 			     } */ *uap = v;
    165 #endif
    166   return (EOPNOTSUPP);
    167 }
    168 
    169 #if 0
    170 /*
    171  * munmapfd: unmap file descriptor
    172  *
    173  * XXX: is this acutally a useful function?   could it be useful?
    174  */
    175 
    176 void munmapfd(p, fd)
    177 
    178 struct proc *p;
    179 int fd;
    180 
    181 {
    182   /*
    183    * XXX should vm_deallocate any regions mapped to this file
    184    */
    185   p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
    186 }
    187 #endif
    188 
    189 /*
    190  * sys_mmap: mmap system call.
    191  *
    192  * => file offest and address may not be page aligned
    193  *    - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
    194  *    - if address isn't page aligned the mapping starts at trunc_page(addr)
    195  *      and the return value is adjusted up by the page offset.
    196  */
    197 
    198 int sys_mmap(p, v, retval)
    199 
    200 struct proc *p;
    201 void *v;
    202 register_t *retval;
    203 
    204 {
    205   register struct sys_mmap_args /* {
    206 				   syscallarg(caddr_t) addr;
    207 				   syscallarg(size_t) len;
    208 				   syscallarg(int) prot;
    209 				   syscallarg(int) flags;
    210 				   syscallarg(int) fd;
    211 				   syscallarg(long) pad;
    212 				   syscallarg(off_t) pos;
    213 				   } */ *uap = v;
    214   vm_offset_t addr;
    215   off_t pos;
    216   vm_size_t size, pageoff;
    217   vm_prot_t prot, maxprot;
    218   int flags, fd;
    219   vm_offset_t vm_min_address = VM_MIN_ADDRESS;
    220   register struct filedesc *fdp = p->p_fd;
    221   register struct file *fp;
    222   struct vnode *vp;
    223   caddr_t handle;
    224   int error;
    225 
    226   /*
    227    * first, extract syscall args from the uap.
    228    */
    229 
    230   addr = (vm_offset_t) SCARG(uap, addr);
    231   size = (vm_size_t) SCARG(uap, len);
    232   prot = SCARG(uap, prot) & VM_PROT_ALL;
    233   flags = SCARG(uap, flags);
    234   fd = SCARG(uap, fd);
    235   pos = SCARG(uap, pos);
    236 
    237   /*
    238    * make sure that the newsize fits within a vm_offset_t
    239    * XXX: need to revise addressing data types
    240    */
    241   if (pos + size > (vm_offset_t)-PAGE_SIZE) {
    242 #ifdef DEBUG
    243     printf("mmap: pos=%qx, size=%x too big\n", pos, (int)size);
    244 #endif
    245     return(EINVAL);
    246   }
    247 
    248   /*
    249    * align file position and save offset.  adjust size.
    250    */
    251 
    252   pageoff = (pos & PAGE_MASK);
    253   pos  -= pageoff;
    254   size += pageoff;			/* add offset */
    255   size = (vm_size_t) round_page(size);	/* round up */
    256   if ((ssize_t) size < 0)
    257     return(EINVAL);			/* don't allow wrap */
    258 
    259   /*
    260    * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
    261    */
    262 
    263   if (flags & MAP_FIXED) {
    264 
    265     /* ensure address and file offset are aligned properly */
    266     addr -= pageoff;
    267     if (addr & PAGE_MASK)
    268       return(EINVAL);
    269 
    270     if (VM_MAXUSER_ADDRESS > 0 && (addr + size) > VM_MAXUSER_ADDRESS)
    271       return(EINVAL);
    272     if (vm_min_address > 0 && addr < vm_min_address)
    273       return(EINVAL);
    274     if (addr > addr + size)
    275       return (EINVAL);		/* no wrapping! */
    276 
    277   } else {
    278 
    279     /*
    280      * not fixed: make sure we skip over the largest possible heap.
    281      * we will refine our guess later (e.g. to account for VAC, etc.)
    282      */
    283     if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
    284       addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
    285   }
    286 
    287   /*
    288    * check for file mappings (i.e. not anonymous) and verify file.
    289    */
    290 
    291   if ((flags & MAP_ANON) == 0) {
    292 
    293     if (fd < 0 || fd >= fdp->fd_nfiles)
    294       return(EBADF);		/* failed range check? */
    295     fp = fdp->fd_ofiles[fd];	/* convert to file pointer */
    296     if (fp == NULL)
    297       return(EBADF);
    298 
    299     if (fp->f_type != DTYPE_VNODE)
    300       return(EINVAL);		/* only mmap vnodes! */
    301     vp = (struct vnode *)fp->f_data;	/* convert to vnode */
    302     if (vp->v_type != VREG && vp->v_type != VCHR)
    303       return (EINVAL);		/* only REG/CHR support mmap */
    304 
    305     /* special case: catch SunOS style /dev/zero */
    306     if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
    307       flags |= MAP_ANON;
    308       goto is_anon;
    309     }
    310 
    311 #if defined(COMPAT_13)
    312     /*
    313      * XXX: support MAP_FILE: some older applications call mmap with flags
    314      * set to MAP_FILE (i.e. zero).    the proper semantics for this seem
    315      * to be MAP_SHARED for devices and MAP_PRIVATE for files.
    316      *
    317      * XXX: how does MAP_ANON fit in the picture?
    318      * XXX: what about MAP_COPY?
    319      */
    320 
    321     if ((flags & (MAP_SHARED|MAP_PRIVATE|MAP_COPY)) == 0) {
    322 #if defined(DIAGNOSTIC)
    323       printf("WARNING: corrected bogus mmap (pid %d comm %s)\n", p->p_pid,
    324 	  p->p_comm);
    325 #endif
    326       if (vp->v_type == VCHR)
    327         flags |= MAP_SHARED;		/* for a device */
    328       else
    329         flags |= MAP_PRIVATE;		/* for a file */
    330     }
    331 #else
    332 
    333     if ((flags & (MAP_SHARED|MAP_PRIVATE|MAP_COPY)) == 0)
    334       return(EINVAL);   /* sorry, old timer */
    335 
    336 #endif
    337 
    338 #if defined(sparc)
    339     /*
    340      * sparc seems to want to map devices MAP_PRIVATE, which doesn't
    341      * make sense for us (why would we want to copy-on-write fault
    342      * framebuffer mappings?).    fix this.
    343      */
    344     if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
    345 #if defined(DIAGNOSTIC)
    346       printf("WARNING: converting MAP_PRIVATE device mapping to MAP_SHARE "
    347 	  "(pid %d comm %s)\n", p->p_pid, p->p_comm);
    348 #endif
    349       flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;   /* switch it */
    350     }
    351 #endif
    352 
    353     /*
    354      * now check protection
    355      */
    356 
    357     maxprot = VM_PROT_EXECUTE;
    358 
    359     /* check read access */
    360     if (fp->f_flag & FREAD)
    361       maxprot |= VM_PROT_READ;
    362     else if (prot & PROT_READ)
    363       return(EACCES);
    364 
    365     /* check write case (if shared) */
    366     if (flags & MAP_SHARED) {
    367       if (fp->f_flag & FWRITE)
    368 	maxprot |= VM_PROT_WRITE;
    369       else if (prot & PROT_WRITE)
    370 	return(EACCES);
    371     } else {
    372       maxprot |= VM_PROT_WRITE;	/* MAP_PRIVATE mappings can always write to */
    373     }
    374 
    375     /*
    376      * set handle to vnode
    377      */
    378 
    379     handle = (caddr_t)vp;
    380 
    381   } else /* MAP_ANON case */ {
    382 
    383     if (fd != -1)
    384       return(EINVAL);
    385 
    386 is_anon:		/* label for SunOS style /dev/zero */
    387     handle = NULL;
    388     maxprot = VM_PROT_ALL;
    389     pos = 0;
    390   }
    391 
    392   /*
    393    * now let kernel internal function uvm_mmap do the work.
    394    */
    395 
    396   error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
    397 		  flags, handle, pos);
    398 
    399   if (error == 0)
    400     *retval = (register_t)(addr + pageoff); /* remember to add offset */
    401 
    402   return (error);
    403 }
    404 
    405 /*
    406  * sys___msync13: the msync system call (a front-end for flush)
    407  */
    408 
    409 int sys___msync13(p, v, retval)
    410 
    411 struct proc *p;
    412 void *v;
    413 register_t *retval;
    414 
    415 {
    416   struct sys___msync13_args /* {
    417 			   syscallarg(caddr_t) addr;
    418 			   syscallarg(size_t) len;
    419 			   syscallarg(int) flags;
    420 			   } */ *uap = v;
    421   vm_offset_t addr;
    422   vm_size_t size, pageoff;
    423   vm_map_t map;
    424   int rv, flags, uvmflags;
    425 
    426   /*
    427    * extract syscall args from the uap
    428    */
    429 
    430   addr = (vm_offset_t)SCARG(uap, addr);
    431   size = (vm_size_t)SCARG(uap, len);
    432   flags = SCARG(uap, flags);
    433 
    434   /* sanity check flags */
    435   if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
    436       (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
    437       (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
    438 	  return (EINVAL);
    439   if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
    440 	  flags |= MS_SYNC;
    441 
    442   /*
    443    * align the address to a page boundary, and adjust the size accordingly.
    444    */
    445 
    446   pageoff = (addr & PAGE_MASK);
    447   addr -= pageoff;
    448   size += pageoff;
    449   size = (vm_size_t) round_page(size);
    450 
    451   /* disallow wrap-around. */
    452   if (addr + size < addr)
    453     return (EINVAL);
    454 
    455   /*
    456    * get map
    457    */
    458 
    459   map = &p->p_vmspace->vm_map;
    460 
    461   /*
    462    * XXXCDC: do we really need this semantic?
    463    *
    464    * XXX Gak!  If size is zero we are supposed to sync "all modified
    465    * pages with the region containing addr".  Unfortunately, we
    466    * don't really keep track of individual mmaps so we approximate
    467    * by flushing the range of the map entry containing addr.
    468    * This can be incorrect if the region splits or is coalesced
    469    * with a neighbor.
    470    */
    471   if (size == 0) {
    472     vm_map_entry_t entry;
    473 
    474     vm_map_lock_read(map);
    475     rv = uvm_map_lookup_entry(map, addr, &entry);
    476     if (rv == TRUE) {
    477       addr = entry->start;
    478       size = entry->end - entry->start;
    479     }
    480     vm_map_unlock_read(map);
    481     if (rv == FALSE)
    482       return (EINVAL);
    483   }
    484 
    485   /*
    486    * translate MS_ flags into PGO_ flags
    487    */
    488   uvmflags = (flags & MS_INVALIDATE) ? PGO_FREE : 0;
    489   if (flags & MS_SYNC)
    490     uvmflags |= PGO_SYNCIO;
    491   else
    492     uvmflags |= PGO_SYNCIO;		/* XXXCDC: force sync for now! */
    493 
    494   /*
    495    * doit!
    496    */
    497   rv = uvm_map_clean(map, addr, addr+size, uvmflags);
    498 
    499   /*
    500    * and return...
    501    */
    502   switch (rv) {
    503   case KERN_SUCCESS:
    504     return(0);
    505   case KERN_INVALID_ADDRESS:
    506     return (ENOMEM);
    507   case KERN_FAILURE:
    508     return (EIO);
    509   case KERN_PAGES_LOCKED:	/* XXXCDC: uvm doesn't return this */
    510     return (EBUSY);
    511   default:
    512     return (EINVAL);
    513   }
    514   /*NOTREACHED*/
    515 }
    516 
    517 /*
    518  * sys_munmap: unmap a users memory
    519  */
    520 
    521 int sys_munmap(p, v, retval)
    522 
    523 register struct proc *p;
    524 void *v;
    525 register_t *retval;
    526 
    527 {
    528   register struct sys_munmap_args /* {
    529 				     syscallarg(caddr_t) addr;
    530 				     syscallarg(size_t) len;
    531 				     } */ *uap = v;
    532   vm_offset_t addr;
    533   vm_size_t size, pageoff;
    534   vm_map_t map;
    535   vm_offset_t vm_min_address = VM_MIN_ADDRESS;
    536   struct vm_map_entry *dead_entries;
    537 
    538   /*
    539    * get syscall args...
    540    */
    541 
    542   addr = (vm_offset_t) SCARG(uap, addr);
    543   size = (vm_size_t) SCARG(uap, len);
    544 
    545   /*
    546    * align the address to a page boundary, and adjust the size accordingly.
    547    */
    548 
    549   pageoff = (addr & PAGE_MASK);
    550   addr -= pageoff;
    551   size += pageoff;
    552   size = (vm_size_t) round_page(size);
    553 
    554   if ((int)size < 0)
    555     return(EINVAL);
    556   if (size == 0)
    557     return(0);
    558 
    559   /*
    560    * Check for illegal addresses.  Watch out for address wrap...
    561    * Note that VM_*_ADDRESS are not constants due to casts (argh).
    562    */
    563   if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
    564     return (EINVAL);
    565   if (vm_min_address > 0 && addr < vm_min_address)
    566     return (EINVAL);
    567   if (addr > addr + size)
    568     return (EINVAL);
    569   map = &p->p_vmspace->vm_map;
    570 
    571 
    572   vm_map_lock(map);	/* lock map so we can checkprot */
    573 
    574   /*
    575    * interesting system call semantic: make sure entire range is
    576    * allocated before allowing an unmap.
    577    */
    578 
    579   if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
    580     vm_map_unlock(map);
    581     return(EINVAL);
    582   }
    583 
    584   /*
    585    * doit!
    586    */
    587   (void) uvm_unmap_remove(map, addr, addr + size, 0, &dead_entries);
    588 
    589   vm_map_unlock(map);	/* and unlock */
    590 
    591   if (dead_entries != NULL)
    592     uvm_unmap_detach(dead_entries, 0);
    593 
    594   return(0);
    595 }
    596 
    597 /*
    598  * sys_mprotect: the mprotect system call
    599  */
    600 
    601 int sys_mprotect(p, v, retval)
    602 
    603 struct proc *p;
    604 void *v;
    605 register_t *retval;
    606 
    607 {
    608   struct sys_mprotect_args /* {
    609 			      syscallarg(caddr_t) addr;
    610 			      syscallarg(int) len;
    611 			      syscallarg(int) prot;
    612 			      } */ *uap = v;
    613   vm_offset_t addr;
    614   vm_size_t size, pageoff;
    615   vm_prot_t prot;
    616   int rv;
    617 
    618   /*
    619    * extract syscall args from uap
    620    */
    621 
    622   addr = (vm_offset_t)SCARG(uap, addr);
    623   size = (vm_size_t)SCARG(uap, len);
    624   prot = SCARG(uap, prot) & VM_PROT_ALL;
    625 
    626   /*
    627    * align the address to a page boundary, and adjust the size accordingly
    628    */
    629   pageoff = (addr & PAGE_MASK);
    630   addr -= pageoff;
    631   size += pageoff;
    632   size = (vm_size_t) round_page(size);
    633   if ((int)size < 0)
    634     return(EINVAL);
    635 
    636   /*
    637    * doit
    638    */
    639 
    640   rv = uvm_map_protect(&p->p_vmspace->vm_map,
    641 			   addr, addr+size, prot, FALSE);
    642 
    643   if (rv == KERN_SUCCESS)
    644     return(0);
    645   if (rv == KERN_PROTECTION_FAILURE)
    646     return(EACCES);
    647   return(EINVAL);
    648 }
    649 
    650 /*
    651  * sys_minherit: the minherit system call
    652  */
    653 
    654 int sys_minherit(p, v, retval)
    655 
    656 struct proc *p;
    657 void *v;
    658 register_t *retval;
    659 
    660 {
    661   struct sys_minherit_args /* {
    662 			      syscallarg(caddr_t) addr;
    663 			      syscallarg(int) len;
    664 			      syscallarg(int) inherit;
    665 			      } */ *uap = v;
    666   vm_offset_t addr;
    667   vm_size_t size, pageoff;
    668   register vm_inherit_t inherit;
    669 
    670   addr = (vm_offset_t)SCARG(uap, addr);
    671   size = (vm_size_t)SCARG(uap, len);
    672   inherit = SCARG(uap, inherit);
    673   /*
    674    * align the address to a page boundary, xand adjust the size accordingly.
    675    */
    676   pageoff = (addr & PAGE_MASK);
    677   addr -= pageoff;
    678   size += pageoff;
    679   size = (vm_size_t) round_page(size);
    680   if ((int)size < 0)
    681     return(EINVAL);
    682 
    683   switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
    684 			 inherit)) {
    685   case KERN_SUCCESS:
    686     return (0);
    687   case KERN_PROTECTION_FAILURE:
    688     return (EACCES);
    689   }
    690   return (EINVAL);
    691 }
    692 
    693 /*
    694  * sys_mlock: memory lock
    695  */
    696 
    697 int sys_mlock(p, v, retval)
    698 
    699 struct proc *p;
    700 void *v;
    701 register_t *retval;
    702 
    703 {
    704   struct sys_mlock_args /* {
    705 			   syscallarg(caddr_t) addr;
    706 			   syscallarg(size_t) len;
    707 			   } */ *uap = v;
    708   vm_offset_t addr;
    709   vm_size_t size, pageoff;
    710   int error;
    711 
    712   /*
    713    * extract syscall args from uap
    714    */
    715   addr = (vm_offset_t)SCARG(uap, addr);
    716   size = (vm_size_t)SCARG(uap, len);
    717 
    718   /*
    719    * align the address to a page boundary and adjust the size accordingly
    720    */
    721   pageoff = (addr & PAGE_MASK);
    722   addr -= pageoff;
    723   size += pageoff;
    724   size = (vm_size_t) round_page(size);
    725 
    726   /* disallow wrap-around. */
    727   if (addr + (int)size < addr)
    728     return (EINVAL);
    729 
    730   if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
    731     return (EAGAIN);
    732 
    733 #ifdef pmap_wired_count
    734   if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
    735       p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
    736     return (EAGAIN);
    737 #else
    738   if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    739     return (error);
    740 #endif
    741 
    742   error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
    743   return (error == KERN_SUCCESS ? 0 : ENOMEM);
    744 }
    745 
    746 /*
    747  * sys_munlock: unlock wired pages
    748  */
    749 
    750 int sys_munlock(p, v, retval)
    751 
    752 struct proc *p;
    753 void *v;
    754 register_t *retval;
    755 
    756 {
    757   struct sys_munlock_args /* {
    758 			     syscallarg(caddr_t) addr;
    759 			     syscallarg(size_t) len;
    760 			     } */ *uap = v;
    761   vm_offset_t addr;
    762   vm_size_t size, pageoff;
    763   int error;
    764 
    765   /*
    766    * extract syscall args from uap
    767    */
    768 
    769   addr = (vm_offset_t)SCARG(uap, addr);
    770   size = (vm_size_t)SCARG(uap, len);
    771 
    772   /*
    773    * align the address to a page boundary, and adjust the size accordingly
    774    */
    775   pageoff = (addr & PAGE_MASK);
    776   addr -= pageoff;
    777   size += pageoff;
    778   size = (vm_size_t) round_page(size);
    779 
    780   /* disallow wrap-around. */
    781   if (addr + (int)size < addr)
    782     return (EINVAL);
    783 
    784 #ifndef pmap_wired_count
    785   if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    786     return (error);
    787 #endif
    788 
    789   error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
    790   return (error == KERN_SUCCESS ? 0 : ENOMEM);
    791 }
    792 
    793 /*
    794  * uvm_mmap: internal version of mmap
    795  *
    796  * - used by sys_mmap, exec, and sysv shm
    797  * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true,
    798  *	sysv shm uses "named anonymous memory")
    799  * - caller must page-align the file offset
    800  */
    801 
    802 int uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
    803 
    804 vm_map_t map;
    805 vm_offset_t *addr;
    806 vm_size_t size;
    807 vm_prot_t prot, maxprot;
    808 int flags;
    809 caddr_t handle;		/* XXX: VNODE? */
    810 vm_offset_t foff;
    811 
    812 {
    813   struct uvm_object *uobj;
    814   struct vnode *vp;
    815   int retval;
    816   int advice = UVM_ADV_NORMAL;
    817   uvm_flag_t uvmflag = 0;
    818 
    819   /*
    820    * check params
    821    */
    822 
    823   if (size == 0)
    824     return(0);
    825   if (foff & PAGE_MASK)
    826     return(EINVAL);
    827   if ((prot & maxprot) != prot)
    828     return(EINVAL);
    829 
    830   /*
    831    * for non-fixed mappings, round off the suggested address.
    832    * for fixed mappings, check alignment and zap old mappings.
    833    */
    834 
    835   if ((flags & MAP_FIXED) == 0) {
    836 
    837     *addr = round_page(*addr);	/* round */
    838 
    839   } else {
    840 
    841     if (*addr & PAGE_MASK)
    842       return(EINVAL);
    843     uvmflag |= UVM_FLAG_FIXED;
    844     (void) uvm_unmap(map, *addr, *addr + size, 0);	/* zap! */
    845   }
    846 
    847   /*
    848    * handle anon vs. non-anon mappings.   for non-anon mappings attach
    849    * to underlying vm object.
    850    */
    851 
    852   if (flags & MAP_ANON) {
    853 
    854     foff = UVM_UNKNOWN_OFFSET;
    855     uobj = NULL;
    856     if ((flags & MAP_SHARED) == 0)
    857       uvmflag |= UVM_FLAG_COPYONW; /* XXX: defer amap create */
    858     else
    859       uvmflag |= UVM_FLAG_OVERLAY; /* shared: create amap now */
    860 
    861   } else {
    862 
    863     vp = (struct vnode *) handle;	/* get vnode */
    864     if (vp->v_type != VCHR) {
    865       uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ? maxprot :
    866 			(maxprot & ~VM_PROT_WRITE));
    867 
    868       /*
    869        * XXXCDC: hack from old code
    870        * don't allow vnodes which have been mapped shared-writeable to
    871        * persist [forces them to be flushed out when last reference goes].
    872        *
    873        * XXXCDC: interesting side effect: avoids a bug.   note that in
    874        * WRITE [ufs_readwrite.c] that we allocate buffer, uncache, and
    875        * then do the write.   the problem with this is that if the uncache
    876        * causes VM data to be flushed to the same area of the file we
    877        * are writing to... in that case we've got the buffer locked and
    878        * our process goes to sleep forever.
    879        *
    880        * XXXCDC: checking maxprot protects us from the "persistbug"
    881        * program but this is not a long term solution.
    882        *
    883        * XXXCDC: we don't bother calling uncache with the vp VOP_LOCKed
    884        * since we know that we are already holding a valid reference to
    885        * the uvn (from the uvn_attach above), and thus it is impossible
    886        * for the uncache to kill the uvn and trigger I/O.
    887        */
    888       if (flags & MAP_SHARED) {
    889 	if ((prot & VM_PROT_WRITE) || (maxprot & VM_PROT_WRITE)) {
    890 	  uvm_vnp_uncache(vp);
    891         }
    892       }
    893 
    894     } else {
    895       uobj = udv_attach((void *) &vp->v_rdev, (flags & MAP_SHARED) ? maxprot :
    896 			(maxprot & ~VM_PROT_WRITE));
    897       advice = UVM_ADV_RANDOM;
    898     }
    899 
    900     if (uobj == NULL)
    901       return((vp->v_type == VCHR) ? EINVAL : ENOMEM);
    902 
    903     if ((flags & MAP_SHARED) == 0)
    904       uvmflag |= UVM_FLAG_COPYONW;
    905   }
    906 
    907   /*
    908    * set up mapping flags
    909    */
    910 
    911   uvmflag = UVM_MAPFLAG(prot, maxprot,
    912 			(flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
    913 			advice, uvmflag);
    914 
    915   /*
    916    * do it!
    917    */
    918 
    919   retval = uvm_map(map, addr, size, uobj, foff, uvmflag);
    920 
    921   if (retval == KERN_SUCCESS)
    922     return(0);
    923 
    924   /*
    925    * errors: first detach from the uobj, if any.
    926    */
    927 
    928   if (uobj)
    929     uobj->pgops->pgo_detach(uobj);
    930 
    931   switch (retval) {
    932   case KERN_INVALID_ADDRESS:
    933   case KERN_NO_SPACE:
    934     return(ENOMEM);
    935   case KERN_PROTECTION_FAILURE:
    936     return(EACCES);
    937   }
    938   return(EINVAL);
    939 }
    940