Home | History | Annotate | Line # | Download | only in uvm
uvm_mmap.c revision 1.11
      1 /*	$NetBSD: uvm_mmap.c,v 1.11 1998/07/07 23:22:13 thorpej Exp $	*/
      2 
      3 /*
      4  * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
      5  *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
      6  */
      7 /*
      8  * Copyright (c) 1997 Charles D. Cranor and Washington University.
      9  * Copyright (c) 1991, 1993 The Regents of the University of California.
     10  * Copyright (c) 1988 University of Utah.
     11  *
     12  * All rights reserved.
     13  *
     14  * This code is derived from software contributed to Berkeley by
     15  * the Systems Programming Group of the University of Utah Computer
     16  * Science Department.
     17  *
     18  * Redistribution and use in source and binary forms, with or without
     19  * modification, are permitted provided that the following conditions
     20  * are met:
     21  * 1. Redistributions of source code must retain the above copyright
     22  *    notice, this list of conditions and the following disclaimer.
     23  * 2. Redistributions in binary form must reproduce the above copyright
     24  *    notice, this list of conditions and the following disclaimer in the
     25  *    documentation and/or other materials provided with the distribution.
     26  * 3. All advertising materials mentioning features or use of this software
     27  *    must display the following acknowledgement:
     28  *      This product includes software developed by the Charles D. Cranor,
     29  *	Washington University, University of California, Berkeley and
     30  *	its contributors.
     31  * 4. Neither the name of the University nor the names of its contributors
     32  *    may be used to endorse or promote products derived from this software
     33  *    without specific prior written permission.
     34  *
     35  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     36  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     38  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     39  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     40  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     41  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     42  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     43  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     44  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     45  * SUCH DAMAGE.
     46  *
     47  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
     48  *      @(#)vm_mmap.c   8.5 (Berkeley) 5/19/94
     49  * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
     50  */
     51 
     52 /*
     53  * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
     54  * function.
     55  */
     56 #include <sys/param.h>
     57 #include <sys/systm.h>
     58 #include <sys/file.h>
     59 #include <sys/filedesc.h>
     60 #include <sys/resourcevar.h>
     61 #include <sys/mman.h>
     62 #include <sys/mount.h>
     63 #include <sys/proc.h>
     64 #include <sys/malloc.h>
     65 #include <sys/vnode.h>
     66 #include <sys/conf.h>
     67 #include <sys/stat.h>
     68 
     69 #include <miscfs/specfs/specdev.h>
     70 
     71 #include <vm/vm.h>
     72 #include <vm/vm_page.h>
     73 #include <vm/vm_kern.h>
     74 
     75 #include <sys/syscallargs.h>
     76 
     77 #include <uvm/uvm.h>
     78 #include <uvm/uvm_device.h>
     79 #include <uvm/uvm_vnode.h>
     80 
     81 
     82 /*
     83  * unimplemented VM system calls:
     84  */
     85 
     86 /*
     87  * sys_sbrk: sbrk system call.
     88  */
     89 
     90 /* ARGSUSED */
     91 int
     92 sys_sbrk(p, v, retval)
     93 	struct proc *p;
     94 	void *v;
     95 	register_t *retval;
     96 {
     97 #if 0
     98 	struct sys_sbrk_args /* {
     99 			  syscallarg(int) incr;
    100 			  } */ *uap = v;
    101 #endif
    102 
    103 	return (EOPNOTSUPP);
    104 }
    105 
    106 /*
    107  * sys_sstk: sstk system call.
    108  */
    109 
    110 /* ARGSUSED */
    111 int
    112 sys_sstk(p, v, retval)
    113 	struct proc *p;
    114 	void *v;
    115 	register_t *retval;
    116 {
    117 #if 0
    118 	struct sys_sstk_args /* {
    119 			  syscallarg(int) incr;
    120 			  } */ *uap = v;
    121 #endif
    122 
    123 	return (EOPNOTSUPP);
    124 }
    125 
    126 /*
    127  * sys_madvise: give advice about memory usage.
    128  */
    129 
    130 /* ARGSUSED */
    131 int
    132 sys_madvise(p, v, retval)
    133 	struct proc *p;
    134 	void *v;
    135 	register_t *retval;
    136 {
    137 #if 0
    138 	struct sys_madvise_args /* {
    139 			     syscallarg(caddr_t) addr;
    140 			     syscallarg(size_t) len;
    141 			     syscallarg(int) behav;
    142 			     } */ *uap = v;
    143 #endif
    144 
    145 	return (EOPNOTSUPP);
    146 }
    147 
    148 /*
    149  * sys_mincore: determine if pages are in core or not.
    150  */
    151 
    152 /* ARGSUSED */
    153 int
    154 sys_mincore(p, v, retval)
    155 	struct proc *p;
    156 	void *v;
    157 	register_t *retval;
    158 {
    159 #if 0
    160 	struct sys_mincore_args /* {
    161 			     syscallarg(caddr_t) addr;
    162 			     syscallarg(size_t) len;
    163 			     syscallarg(char *) vec;
    164 			     } */ *uap = v;
    165 #endif
    166 
    167 	return (EOPNOTSUPP);
    168 }
    169 
    170 #if 0
    171 /*
    172  * munmapfd: unmap file descriptor
    173  *
    174  * XXX: is this acutally a useful function?   could it be useful?
    175  */
    176 
    177 void
    178 munmapfd(p, fd)
    179 	struct proc *p;
    180 	int fd;
    181 {
    182 
    183 	/*
    184 	 * XXX should vm_deallocate any regions mapped to this file
    185 	 */
    186 	p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
    187 }
    188 #endif
    189 
    190 /*
    191  * sys_mmap: mmap system call.
    192  *
    193  * => file offest and address may not be page aligned
    194  *    - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
    195  *    - if address isn't page aligned the mapping starts at trunc_page(addr)
    196  *      and the return value is adjusted up by the page offset.
    197  */
    198 
    199 int
    200 sys_mmap(p, v, retval)
    201 	struct proc *p;
    202 	void *v;
    203 	register_t *retval;
    204 {
    205 	register struct sys_mmap_args /* {
    206 		syscallarg(caddr_t) addr;
    207 		syscallarg(size_t) len;
    208 		syscallarg(int) prot;
    209 		syscallarg(int) flags;
    210 		syscallarg(int) fd;
    211 		syscallarg(long) pad;
    212 		syscallarg(off_t) pos;
    213 	} */ *uap = v;
    214 	vm_offset_t addr;
    215 	struct vattr va;
    216 	off_t pos;
    217 	vm_size_t size, pageoff;
    218 	vm_prot_t prot, maxprot;
    219 	int flags, fd;
    220 	vm_offset_t vm_min_address = VM_MIN_ADDRESS;
    221 	register struct filedesc *fdp = p->p_fd;
    222 	register struct file *fp;
    223 	struct vnode *vp;
    224 	caddr_t handle;
    225 	int error;
    226 
    227 	/*
    228 	 * first, extract syscall args from the uap.
    229 	 */
    230 
    231 	addr = (vm_offset_t) SCARG(uap, addr);
    232 	size = (vm_size_t) SCARG(uap, len);
    233 	prot = SCARG(uap, prot) & VM_PROT_ALL;
    234 	flags = SCARG(uap, flags);
    235 	fd = SCARG(uap, fd);
    236 	pos = SCARG(uap, pos);
    237 
    238 	/*
    239 	 * make sure that the newsize fits within a vm_offset_t
    240 	 * XXX: need to revise addressing data types
    241 	 */
    242 	if (pos + size > (vm_offset_t)-PAGE_SIZE) {
    243 #ifdef DEBUG
    244 		printf("mmap: pos=%qx, size=%x too big\n", pos, (int)size);
    245 #endif
    246 		return (EINVAL);
    247 	}
    248 
    249 	/*
    250 	 * align file position and save offset.  adjust size.
    251 	 */
    252 
    253 	pageoff = (pos & PAGE_MASK);
    254 	pos  -= pageoff;
    255 	size += pageoff;			/* add offset */
    256 	size = (vm_size_t) round_page(size);	/* round up */
    257 	if ((ssize_t) size < 0)
    258 		return (EINVAL);			/* don't allow wrap */
    259 
    260 	/*
    261 	 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
    262 	 */
    263 
    264 	if (flags & MAP_FIXED) {
    265 
    266 		/* ensure address and file offset are aligned properly */
    267 		addr -= pageoff;
    268 		if (addr & PAGE_MASK)
    269 			return (EINVAL);
    270 
    271 		if (VM_MAXUSER_ADDRESS > 0 &&
    272 		    (addr + size) > VM_MAXUSER_ADDRESS)
    273 			return (EINVAL);
    274 		if (vm_min_address > 0 && addr < vm_min_address)
    275 			return (EINVAL);
    276 		if (addr > addr + size)
    277 			return (EINVAL);		/* no wrapping! */
    278 
    279 	} else {
    280 
    281 		/*
    282 		 * not fixed: make sure we skip over the largest possible heap.
    283 		 * we will refine our guess later (e.g. to account for VAC, etc)
    284 		 */
    285 		if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
    286 			addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
    287 	}
    288 
    289 	/*
    290 	 * check for file mappings (i.e. not anonymous) and verify file.
    291 	 */
    292 
    293 	if ((flags & MAP_ANON) == 0) {
    294 
    295 		if (fd < 0 || fd >= fdp->fd_nfiles)
    296 			return(EBADF);		/* failed range check? */
    297 		fp = fdp->fd_ofiles[fd];	/* convert to file pointer */
    298 		if (fp == NULL)
    299 			return(EBADF);
    300 
    301 		if (fp->f_type != DTYPE_VNODE)
    302 			return (ENODEV);		/* only mmap vnodes! */
    303 		vp = (struct vnode *)fp->f_data;	/* convert to vnode */
    304 
    305 		if (vp->v_type != VREG && vp->v_type != VCHR &&
    306 		    vp->v_type != VBLK)
    307 			return (ENODEV);  /* only REG/CHR/BLK support mmap */
    308 
    309 		/* special case: catch SunOS style /dev/zero */
    310 		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
    311 			flags |= MAP_ANON;
    312 			goto is_anon;
    313 		}
    314 
    315 		/*
    316 		 * Old programs may not select a specific sharing type, so
    317 		 * default to an appropriate one.
    318 		 *
    319 		 * XXX: how does MAP_ANON fit in the picture?
    320 		 */
    321 		if ((flags & (MAP_SHARED|MAP_PRIVATE|MAP_COPY)) == 0) {
    322 #if defined(DEBUG)
    323 			printf("WARNING: defaulted mmap() share type to "
    324 			   "%s (pid %d comm %s)\n", vp->v_type == VCHR ?
    325 			   "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
    326 			    p->p_comm);
    327 #endif
    328 			if (vp->v_type == VCHR)
    329 				flags |= MAP_SHARED;	/* for a device */
    330 			else
    331 				flags |= MAP_PRIVATE;	/* for a file */
    332 		}
    333 
    334 		/*
    335 		 * MAP_PRIVATE device mappings don't make sense (and aren't
    336 		 * supported anyway).  However, some programs rely on this,
    337 		 * so just change it to MAP_SHARED.
    338 		 */
    339 		if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
    340 #if defined(DIAGNOSTIC)
    341 			printf("WARNING: converted MAP_PRIVATE device mapping "
    342 			    "to MAP_SHARED (pid %d comm %s)\n", p->p_pid,
    343 			    p->p_comm);
    344 #endif
    345 			flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
    346 		}
    347 
    348 		/*
    349 		 * now check protection
    350 		 */
    351 
    352 		maxprot = VM_PROT_EXECUTE;
    353 
    354 		/* check read access */
    355 		if (fp->f_flag & FREAD)
    356 			maxprot |= VM_PROT_READ;
    357 		else if (prot & PROT_READ)
    358 			return (EACCES);
    359 
    360 		/* check write access, shared case first */
    361 		if (flags & MAP_SHARED) {
    362 			/*
    363 			 * if the file is writable, only add PROT_WRITE to
    364 			 * maxprot if the file is not immutable, append-only.
    365 			 * otherwise, if we have asked for PROT_WRITE, return
    366 			 * EPERM.
    367 			 */
    368 			if (fp->f_flag & FWRITE) {
    369 				if ((error =
    370 				    VOP_GETATTR(vp, &va, p->p_ucred, p)))
    371 					return (error);
    372 				if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
    373 					maxprot |= VM_PROT_WRITE;
    374 				else if (prot & PROT_WRITE)
    375 					return (EPERM);
    376 			}
    377 			else if (prot & PROT_WRITE)
    378 				return (EACCES);
    379 		} else {
    380 			/* MAP_PRIVATE mappings can always write to */
    381 			maxprot |= VM_PROT_WRITE;
    382 		}
    383 
    384 		/*
    385 		 * set handle to vnode
    386 		 */
    387 
    388 		handle = (caddr_t)vp;
    389 
    390 	} else {		/* MAP_ANON case */
    391 
    392 		if (fd != -1)
    393 			return (EINVAL);
    394 
    395 is_anon:		/* label for SunOS style /dev/zero */
    396 		handle = NULL;
    397 		maxprot = VM_PROT_ALL;
    398 		pos = 0;
    399 	}
    400 
    401 	/*
    402 	 * now let kernel internal function uvm_mmap do the work.
    403 	 */
    404 
    405 	error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
    406 	    flags, handle, pos);
    407 
    408 	if (error == 0)
    409 		/* remember to add offset */
    410 		*retval = (register_t)(addr + pageoff);
    411 
    412 	return (error);
    413 }
    414 
    415 /*
    416  * sys___msync13: the msync system call (a front-end for flush)
    417  */
    418 
    419 int
    420 sys___msync13(p, v, retval)
    421 	struct proc *p;
    422 	void *v;
    423 	register_t *retval;
    424 {
    425 	struct sys___msync13_args /* {
    426 		syscallarg(caddr_t) addr;
    427 		syscallarg(size_t) len;
    428 		syscallarg(int) flags;
    429 	} */ *uap = v;
    430 	vm_offset_t addr;
    431 	vm_size_t size, pageoff;
    432 	vm_map_t map;
    433 	int rv, flags, uvmflags;
    434 
    435 	/*
    436 	 * extract syscall args from the uap
    437 	 */
    438 
    439 	addr = (vm_offset_t)SCARG(uap, addr);
    440 	size = (vm_size_t)SCARG(uap, len);
    441 	flags = SCARG(uap, flags);
    442 
    443 	/* sanity check flags */
    444 	if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
    445 			(flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
    446 			(flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
    447 	  return (EINVAL);
    448 	if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
    449 	  flags |= MS_SYNC;
    450 
    451 	/*
    452 	 * align the address to a page boundary, and adjust the size accordingly
    453 	 */
    454 
    455 	pageoff = (addr & PAGE_MASK);
    456 	addr -= pageoff;
    457 	size += pageoff;
    458 	size = (vm_size_t) round_page(size);
    459 
    460 	/* disallow wrap-around. */
    461 	if (addr + size < addr)
    462 		return (EINVAL);
    463 
    464 	/*
    465 	 * get map
    466 	 */
    467 
    468 	map = &p->p_vmspace->vm_map;
    469 
    470 	/*
    471 	 * XXXCDC: do we really need this semantic?
    472 	 *
    473 	 * XXX Gak!  If size is zero we are supposed to sync "all modified
    474 	 * pages with the region containing addr".  Unfortunately, we
    475 	 * don't really keep track of individual mmaps so we approximate
    476 	 * by flushing the range of the map entry containing addr.
    477 	 * This can be incorrect if the region splits or is coalesced
    478 	 * with a neighbor.
    479 	 */
    480 	if (size == 0) {
    481 		vm_map_entry_t entry;
    482 
    483 		vm_map_lock_read(map);
    484 		rv = uvm_map_lookup_entry(map, addr, &entry);
    485 		if (rv == TRUE) {
    486 			addr = entry->start;
    487 			size = entry->end - entry->start;
    488 		}
    489 		vm_map_unlock_read(map);
    490 		if (rv == FALSE)
    491 			return (EINVAL);
    492 	}
    493 
    494 	/*
    495 	 * translate MS_ flags into PGO_ flags
    496 	 */
    497 	uvmflags = (flags & MS_INVALIDATE) ? PGO_FREE : 0;
    498 	if (flags & MS_SYNC)
    499 		uvmflags |= PGO_SYNCIO;
    500 	else
    501 		uvmflags |= PGO_SYNCIO;	 /* XXXCDC: force sync for now! */
    502 
    503 	/*
    504 	 * doit!
    505 	 */
    506 	rv = uvm_map_clean(map, addr, addr+size, uvmflags);
    507 
    508 	/*
    509 	 * and return...
    510 	 */
    511 	switch (rv) {
    512 	case KERN_SUCCESS:
    513 		return(0);
    514 	case KERN_INVALID_ADDRESS:
    515 		return (ENOMEM);
    516 	case KERN_FAILURE:
    517 		return (EIO);
    518 	case KERN_PAGES_LOCKED:	/* XXXCDC: uvm doesn't return this */
    519 		return (EBUSY);
    520 	default:
    521 		return (EINVAL);
    522 	}
    523 	/*NOTREACHED*/
    524 }
    525 
    526 /*
    527  * sys_munmap: unmap a users memory
    528  */
    529 
    530 int
    531 sys_munmap(p, v, retval)
    532 	register struct proc *p;
    533 	void *v;
    534 	register_t *retval;
    535 {
    536 	register struct sys_munmap_args /* {
    537 		syscallarg(caddr_t) addr;
    538 		syscallarg(size_t) len;
    539 	} */ *uap = v;
    540 	vm_offset_t addr;
    541 	vm_size_t size, pageoff;
    542 	vm_map_t map;
    543 	vm_offset_t vm_min_address = VM_MIN_ADDRESS;
    544 	struct vm_map_entry *dead_entries;
    545 
    546 	/*
    547 	 * get syscall args...
    548 	 */
    549 
    550 	addr = (vm_offset_t) SCARG(uap, addr);
    551 	size = (vm_size_t) SCARG(uap, len);
    552 
    553 	/*
    554 	 * align the address to a page boundary, and adjust the size accordingly
    555 	 */
    556 
    557 	pageoff = (addr & PAGE_MASK);
    558 	addr -= pageoff;
    559 	size += pageoff;
    560 	size = (vm_size_t) round_page(size);
    561 
    562 	if ((int)size < 0)
    563 		return (EINVAL);
    564 	if (size == 0)
    565 		return (0);
    566 
    567 	/*
    568 	 * Check for illegal addresses.  Watch out for address wrap...
    569 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
    570 	 */
    571 	if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
    572 		return (EINVAL);
    573 	if (vm_min_address > 0 && addr < vm_min_address)
    574 		return (EINVAL);
    575 	if (addr > addr + size)
    576 		return (EINVAL);
    577 	map = &p->p_vmspace->vm_map;
    578 
    579 
    580 	vm_map_lock(map);	/* lock map so we can checkprot */
    581 
    582 	/*
    583 	 * interesting system call semantic: make sure entire range is
    584 	 * allocated before allowing an unmap.
    585 	 */
    586 
    587 	if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
    588 		vm_map_unlock(map);
    589 		return (EINVAL);
    590 	}
    591 
    592 	/*
    593 	 * doit!
    594 	 */
    595 	(void) uvm_unmap_remove(map, addr, addr + size, 0, &dead_entries);
    596 
    597 	vm_map_unlock(map);	/* and unlock */
    598 
    599 	if (dead_entries != NULL)
    600 		uvm_unmap_detach(dead_entries, 0);
    601 
    602 	return (0);
    603 }
    604 
    605 /*
    606  * sys_mprotect: the mprotect system call
    607  */
    608 
    609 int
    610 sys_mprotect(p, v, retval)
    611 	struct proc *p;
    612 	void *v;
    613 	register_t *retval;
    614 {
    615 	struct sys_mprotect_args /* {
    616 		syscallarg(caddr_t) addr;
    617 		syscallarg(int) len;
    618 		syscallarg(int) prot;
    619 	} */ *uap = v;
    620 	vm_offset_t addr;
    621 	vm_size_t size, pageoff;
    622 	vm_prot_t prot;
    623 	int rv;
    624 
    625 	/*
    626 	 * extract syscall args from uap
    627 	 */
    628 
    629 	addr = (vm_offset_t)SCARG(uap, addr);
    630 	size = (vm_size_t)SCARG(uap, len);
    631 	prot = SCARG(uap, prot) & VM_PROT_ALL;
    632 
    633 	/*
    634 	 * align the address to a page boundary, and adjust the size accordingly
    635 	 */
    636 	pageoff = (addr & PAGE_MASK);
    637 	addr -= pageoff;
    638 	size += pageoff;
    639 	size = (vm_size_t) round_page(size);
    640 	if ((int)size < 0)
    641 		return (EINVAL);
    642 
    643 	/*
    644 	 * doit
    645 	 */
    646 
    647 	rv = uvm_map_protect(&p->p_vmspace->vm_map,
    648 			   addr, addr+size, prot, FALSE);
    649 
    650 	if (rv == KERN_SUCCESS)
    651 		return (0);
    652 	if (rv == KERN_PROTECTION_FAILURE)
    653 		return (EACCES);
    654 	return (EINVAL);
    655 }
    656 
    657 /*
    658  * sys_minherit: the minherit system call
    659  */
    660 
    661 int
    662 sys_minherit(p, v, retval)
    663 	struct proc *p;
    664 	void *v;
    665 	register_t *retval;
    666 {
    667 	struct sys_minherit_args /* {
    668 		syscallarg(caddr_t) addr;
    669 		syscallarg(int) len;
    670 		syscallarg(int) inherit;
    671 	} */ *uap = v;
    672 	vm_offset_t addr;
    673 	vm_size_t size, pageoff;
    674 	register vm_inherit_t inherit;
    675 
    676 	addr = (vm_offset_t)SCARG(uap, addr);
    677 	size = (vm_size_t)SCARG(uap, len);
    678 	inherit = SCARG(uap, inherit);
    679 	/*
    680 	 * align the address to a page boundary, and adjust the size accordingly
    681 	 */
    682 
    683 	pageoff = (addr & PAGE_MASK);
    684 	addr -= pageoff;
    685 	size += pageoff;
    686 	size = (vm_size_t) round_page(size);
    687 
    688 	if ((int)size < 0)
    689 		return (EINVAL);
    690 
    691 	switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
    692 			 inherit)) {
    693 	case KERN_SUCCESS:
    694 		return (0);
    695 	case KERN_PROTECTION_FAILURE:
    696 		return (EACCES);
    697 	}
    698 	return (EINVAL);
    699 }
    700 
    701 /*
    702  * sys_mlock: memory lock
    703  */
    704 
    705 int
    706 sys_mlock(p, v, retval)
    707 	struct proc *p;
    708 	void *v;
    709 	register_t *retval;
    710 {
    711 	struct sys_mlock_args /* {
    712 		syscallarg(const void *) addr;
    713 		syscallarg(size_t) len;
    714 	} */ *uap = v;
    715 	vm_offset_t addr;
    716 	vm_size_t size, pageoff;
    717 	int error;
    718 
    719 	/*
    720 	 * extract syscall args from uap
    721 	 */
    722 	addr = (vm_offset_t)SCARG(uap, addr);
    723 	size = (vm_size_t)SCARG(uap, len);
    724 
    725 	/*
    726 	 * align the address to a page boundary and adjust the size accordingly
    727 	 */
    728 	pageoff = (addr & PAGE_MASK);
    729 	addr -= pageoff;
    730 	size += pageoff;
    731 	size = (vm_size_t) round_page(size);
    732 
    733 	/* disallow wrap-around. */
    734 	if (addr + (int)size < addr)
    735 		return (EINVAL);
    736 
    737 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
    738 		return (EAGAIN);
    739 
    740 #ifdef pmap_wired_count
    741 	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
    742 			p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
    743 		return (EAGAIN);
    744 #else
    745 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    746 		return (error);
    747 #endif
    748 
    749 	error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
    750 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
    751 }
    752 
    753 /*
    754  * sys_munlock: unlock wired pages
    755  */
    756 
    757 int
    758 sys_munlock(p, v, retval)
    759 	struct proc *p;
    760 	void *v;
    761 	register_t *retval;
    762 {
    763 	struct sys_munlock_args /* {
    764 		syscallarg(const void *) addr;
    765 		syscallarg(size_t) len;
    766 	} */ *uap = v;
    767 	vm_offset_t addr;
    768 	vm_size_t size, pageoff;
    769 	int error;
    770 
    771 	/*
    772 	 * extract syscall args from uap
    773 	 */
    774 
    775 	addr = (vm_offset_t)SCARG(uap, addr);
    776 	size = (vm_size_t)SCARG(uap, len);
    777 
    778 	/*
    779 	 * align the address to a page boundary, and adjust the size accordingly
    780 	 */
    781 	pageoff = (addr & PAGE_MASK);
    782 	addr -= pageoff;
    783 	size += pageoff;
    784 	size = (vm_size_t) round_page(size);
    785 
    786 	/* disallow wrap-around. */
    787 	if (addr + (int)size < addr)
    788 		return (EINVAL);
    789 
    790 #ifndef pmap_wired_count
    791 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    792 		return (error);
    793 #endif
    794 
    795 	error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
    796 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
    797 }
    798 
    799 /*
    800  * uvm_mmap: internal version of mmap
    801  *
    802  * - used by sys_mmap, exec, and sysv shm
    803  * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true,
    804  *	sysv shm uses "named anonymous memory")
    805  * - caller must page-align the file offset
    806  */
    807 
    808 int
    809 uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
    810 	vm_map_t map;
    811 	vm_offset_t *addr;
    812 	vm_size_t size;
    813 	vm_prot_t prot, maxprot;
    814 	int flags;
    815 	caddr_t handle;		/* XXX: VNODE? */
    816 	vm_offset_t foff;
    817 {
    818 	struct uvm_object *uobj;
    819 	struct vnode *vp;
    820 	int retval;
    821 	int advice = UVM_ADV_NORMAL;
    822 	uvm_flag_t uvmflag = 0;
    823 
    824 	/*
    825 	 * check params
    826 	 */
    827 
    828 	if (size == 0)
    829 		return(0);
    830 	if (foff & PAGE_MASK)
    831 		return(EINVAL);
    832 	if ((prot & maxprot) != prot)
    833 		return(EINVAL);
    834 
    835 	/*
    836 	 * for non-fixed mappings, round off the suggested address.
    837 	 * for fixed mappings, check alignment and zap old mappings.
    838 	 */
    839 
    840 	if ((flags & MAP_FIXED) == 0) {
    841 		*addr = round_page(*addr);	/* round */
    842 	} else {
    843 
    844 		if (*addr & PAGE_MASK)
    845 			return(EINVAL);
    846 		uvmflag |= UVM_FLAG_FIXED;
    847 		(void) uvm_unmap(map, *addr, *addr + size, 0);	/* zap! */
    848 	}
    849 
    850 	/*
    851 	 * handle anon vs. non-anon mappings.   for non-anon mappings attach
    852 	 * to underlying vm object.
    853 	 */
    854 
    855 	if (flags & MAP_ANON) {
    856 
    857 		foff = UVM_UNKNOWN_OFFSET;
    858 		uobj = NULL;
    859 		if ((flags & MAP_SHARED) == 0)
    860 			/* XXX: defer amap create */
    861 			uvmflag |= UVM_FLAG_COPYONW;
    862 		else
    863 			/* shared: create amap now */
    864 			uvmflag |= UVM_FLAG_OVERLAY;
    865 
    866 	} else {
    867 
    868 		vp = (struct vnode *) handle;	/* get vnode */
    869 		if (vp->v_type != VCHR) {
    870 			uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
    871 			   maxprot : (maxprot & ~VM_PROT_WRITE));
    872 
    873 			/*
    874 			 * XXXCDC: hack from old code
    875 			 * don't allow vnodes which have been mapped
    876 			 * shared-writeable to persist [forces them to be
    877 			 * flushed out when last reference goes].
    878 			 * XXXCDC: interesting side effect: avoids a bug.
    879 			 * note that in WRITE [ufs_readwrite.c] that we
    880 			 * allocate buffer, uncache, and then do the write.
    881 			 * the problem with this is that if the uncache causes
    882 			 * VM data to be flushed to the same area of the file
    883 			 * we are writing to... in that case we've got the
    884 			 * buffer locked and our process goes to sleep forever.
    885 			 *
    886 			 * XXXCDC: checking maxprot protects us from the
    887 			 * "persistbug" program but this is not a long term
    888 			 * solution.
    889 			 *
    890 			 * XXXCDC: we don't bother calling uncache with the vp
    891 			 * VOP_LOCKed since we know that we are already
    892 			 * holding a valid reference to the uvn (from the
    893 			 * uvn_attach above), and thus it is impossible for
    894 			 * the uncache to kill the uvn and trigger I/O.
    895 			 */
    896 			if (flags & MAP_SHARED) {
    897 				if ((prot & VM_PROT_WRITE) ||
    898 				    (maxprot & VM_PROT_WRITE)) {
    899 					uvm_vnp_uncache(vp);
    900 				}
    901 			}
    902 
    903 		} else {
    904 			uobj = udv_attach((void *) &vp->v_rdev,
    905 			    (flags & MAP_SHARED) ?
    906 			    maxprot : (maxprot & ~VM_PROT_WRITE));
    907 			advice = UVM_ADV_RANDOM;
    908 		}
    909 
    910 		if (uobj == NULL)
    911 			return((vp->v_type == VREG) ? ENOMEM : EINVAL);
    912 
    913 		if ((flags & MAP_SHARED) == 0)
    914 			uvmflag |= UVM_FLAG_COPYONW;
    915 	}
    916 
    917 	/*
    918 	 * set up mapping flags
    919 	 */
    920 
    921 	uvmflag = UVM_MAPFLAG(prot, maxprot,
    922 			(flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
    923 			advice, uvmflag);
    924 
    925 	/*
    926 	 * do it!
    927 	 */
    928 
    929 	retval = uvm_map(map, addr, size, uobj, foff, uvmflag);
    930 
    931 	if (retval == KERN_SUCCESS)
    932 		return(0);
    933 
    934 	/*
    935 	 * errors: first detach from the uobj, if any.
    936 	 */
    937 
    938 	if (uobj)
    939 		uobj->pgops->pgo_detach(uobj);
    940 
    941 	switch (retval) {
    942 	case KERN_INVALID_ADDRESS:
    943 	case KERN_NO_SPACE:
    944 		return(ENOMEM);
    945 	case KERN_PROTECTION_FAILURE:
    946 		return(EACCES);
    947 	}
    948 	return(EINVAL);
    949 }
    950