Home | History | Annotate | Line # | Download | only in uvm
uvm_mmap.c revision 1.9
      1 /*	$NetBSD: uvm_mmap.c,v 1.9 1998/05/10 12:35:59 mrg Exp $	*/
      2 
      3 /*
      4  * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
      5  *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
      6  */
      7 /*
      8  * Copyright (c) 1997 Charles D. Cranor and Washington University.
      9  * Copyright (c) 1991, 1993 The Regents of the University of California.
     10  * Copyright (c) 1988 University of Utah.
     11  *
     12  * All rights reserved.
     13  *
     14  * This code is derived from software contributed to Berkeley by
     15  * the Systems Programming Group of the University of Utah Computer
     16  * Science Department.
     17  *
     18  * Redistribution and use in source and binary forms, with or without
     19  * modification, are permitted provided that the following conditions
     20  * are met:
     21  * 1. Redistributions of source code must retain the above copyright
     22  *    notice, this list of conditions and the following disclaimer.
     23  * 2. Redistributions in binary form must reproduce the above copyright
     24  *    notice, this list of conditions and the following disclaimer in the
     25  *    documentation and/or other materials provided with the distribution.
     26  * 3. All advertising materials mentioning features or use of this software
     27  *    must display the following acknowledgement:
     28  *      This product includes software developed by the Charles D. Cranor,
     29  *	Washington University, University of California, Berkeley and
     30  *	its contributors.
     31  * 4. Neither the name of the University nor the names of its contributors
     32  *    may be used to endorse or promote products derived from this software
     33  *    without specific prior written permission.
     34  *
     35  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     36  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     38  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     39  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     40  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     41  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     42  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     43  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     44  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     45  * SUCH DAMAGE.
     46  *
     47  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
     48  *      @(#)vm_mmap.c   8.5 (Berkeley) 5/19/94
     49  * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
     50  */
     51 
     52 /*
     53  * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
     54  * function.
     55  */
     56 #include <sys/param.h>
     57 #include <sys/systm.h>
     58 #include <sys/file.h>
     59 #include <sys/filedesc.h>
     60 #include <sys/resourcevar.h>
     61 #include <sys/mman.h>
     62 #include <sys/mount.h>
     63 #include <sys/proc.h>
     64 #include <sys/malloc.h>
     65 #include <sys/vnode.h>
     66 #include <sys/conf.h>
     67 #include <sys/stat.h>
     68 
     69 #include <miscfs/specfs/specdev.h>
     70 
     71 #include <vm/vm.h>
     72 #include <vm/vm_page.h>
     73 #include <vm/vm_kern.h>
     74 
     75 #include <sys/syscallargs.h>
     76 
     77 #include <uvm/uvm.h>
     78 #include <uvm/uvm_device.h>
     79 #include <uvm/uvm_vnode.h>
     80 
     81 
     82 /*
     83  * unimplemented VM system calls:
     84  */
     85 
     86 /*
     87  * sys_sbrk: sbrk system call.
     88  */
     89 
     90 /* ARGSUSED */
     91 int
     92 sys_sbrk(p, v, retval)
     93 	struct proc *p;
     94 	void *v;
     95 	register_t *retval;
     96 {
     97 #if 0
     98 	struct sys_sbrk_args /* {
     99 			  syscallarg(int) incr;
    100 			  } */ *uap = v;
    101 #endif
    102 
    103 	return (EOPNOTSUPP);
    104 }
    105 
    106 /*
    107  * sys_sstk: sstk system call.
    108  */
    109 
    110 /* ARGSUSED */
    111 int
    112 sys_sstk(p, v, retval)
    113 	struct proc *p;
    114 	void *v;
    115 	register_t *retval;
    116 {
    117 #if 0
    118 	struct sys_sstk_args /* {
    119 			  syscallarg(int) incr;
    120 			  } */ *uap = v;
    121 #endif
    122 
    123 	return (EOPNOTSUPP);
    124 }
    125 
    126 /*
    127  * sys_madvise: give advice about memory usage.
    128  */
    129 
    130 /* ARGSUSED */
    131 int
    132 sys_madvise(p, v, retval)
    133 	struct proc *p;
    134 	void *v;
    135 	register_t *retval;
    136 {
    137 #if 0
    138 	struct sys_madvise_args /* {
    139 			     syscallarg(caddr_t) addr;
    140 			     syscallarg(size_t) len;
    141 			     syscallarg(int) behav;
    142 			     } */ *uap = v;
    143 #endif
    144 
    145 	return (EOPNOTSUPP);
    146 }
    147 
    148 /*
    149  * sys_mincore: determine if pages are in core or not.
    150  */
    151 
    152 /* ARGSUSED */
    153 int
    154 sys_mincore(p, v, retval)
    155 	struct proc *p;
    156 	void *v;
    157 	register_t *retval;
    158 {
    159 #if 0
    160 	struct sys_mincore_args /* {
    161 			     syscallarg(caddr_t) addr;
    162 			     syscallarg(size_t) len;
    163 			     syscallarg(char *) vec;
    164 			     } */ *uap = v;
    165 #endif
    166 
    167 	return (EOPNOTSUPP);
    168 }
    169 
    170 #if 0
    171 /*
    172  * munmapfd: unmap file descriptor
    173  *
    174  * XXX: is this acutally a useful function?   could it be useful?
    175  */
    176 
    177 void
    178 munmapfd(p, fd)
    179 	struct proc *p;
    180 	int fd;
    181 {
    182 
    183 	/*
    184 	 * XXX should vm_deallocate any regions mapped to this file
    185 	 */
    186 	p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
    187 }
    188 #endif
    189 
    190 /*
    191  * sys_mmap: mmap system call.
    192  *
    193  * => file offest and address may not be page aligned
    194  *    - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
    195  *    - if address isn't page aligned the mapping starts at trunc_page(addr)
    196  *      and the return value is adjusted up by the page offset.
    197  */
    198 
    199 int
    200 sys_mmap(p, v, retval)
    201 	struct proc *p;
    202 	void *v;
    203 	register_t *retval;
    204 {
    205 	register struct sys_mmap_args /* {
    206 		syscallarg(caddr_t) addr;
    207 		syscallarg(size_t) len;
    208 		syscallarg(int) prot;
    209 		syscallarg(int) flags;
    210 		syscallarg(int) fd;
    211 		syscallarg(long) pad;
    212 		syscallarg(off_t) pos;
    213 	} */ *uap = v;
    214 	vm_offset_t addr;
    215 	struct vattr va;
    216 	off_t pos;
    217 	vm_size_t size, pageoff;
    218 	vm_prot_t prot, maxprot;
    219 	int flags, fd;
    220 	vm_offset_t vm_min_address = VM_MIN_ADDRESS;
    221 	register struct filedesc *fdp = p->p_fd;
    222 	register struct file *fp;
    223 	struct vnode *vp;
    224 	caddr_t handle;
    225 	int error;
    226 
    227 	/*
    228 	 * first, extract syscall args from the uap.
    229 	 */
    230 
    231 	addr = (vm_offset_t) SCARG(uap, addr);
    232 	size = (vm_size_t) SCARG(uap, len);
    233 	prot = SCARG(uap, prot) & VM_PROT_ALL;
    234 	flags = SCARG(uap, flags);
    235 	fd = SCARG(uap, fd);
    236 	pos = SCARG(uap, pos);
    237 
    238 	/*
    239 	 * make sure that the newsize fits within a vm_offset_t
    240 	 * XXX: need to revise addressing data types
    241 	 */
    242 	if (pos + size > (vm_offset_t)-PAGE_SIZE) {
    243 #ifdef DEBUG
    244 		printf("mmap: pos=%qx, size=%x too big\n", pos, (int)size);
    245 #endif
    246 		return (EINVAL);
    247 	}
    248 
    249 	/*
    250 	 * align file position and save offset.  adjust size.
    251 	 */
    252 
    253 	pageoff = (pos & PAGE_MASK);
    254 	pos  -= pageoff;
    255 	size += pageoff;			/* add offset */
    256 	size = (vm_size_t) round_page(size);	/* round up */
    257 	if ((ssize_t) size < 0)
    258 		return (EINVAL);			/* don't allow wrap */
    259 
    260 	/*
    261 	 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
    262 	 */
    263 
    264 	if (flags & MAP_FIXED) {
    265 
    266 		/* ensure address and file offset are aligned properly */
    267 		addr -= pageoff;
    268 		if (addr & PAGE_MASK)
    269 			return (EINVAL);
    270 
    271 		if (VM_MAXUSER_ADDRESS > 0 &&
    272 		    (addr + size) > VM_MAXUSER_ADDRESS)
    273 			return (EINVAL);
    274 		if (vm_min_address > 0 && addr < vm_min_address)
    275 			return (EINVAL);
    276 		if (addr > addr + size)
    277 			return (EINVAL);		/* no wrapping! */
    278 
    279 	} else {
    280 
    281 		/*
    282 		 * not fixed: make sure we skip over the largest possible heap.
    283 		 * we will refine our guess later (e.g. to account for VAC, etc)
    284 		 */
    285 		if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
    286 			addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
    287 	}
    288 
    289 	/*
    290 	 * check for file mappings (i.e. not anonymous) and verify file.
    291 	 */
    292 
    293 	if ((flags & MAP_ANON) == 0) {
    294 
    295 		if (fd < 0 || fd >= fdp->fd_nfiles)
    296 			return(EBADF);		/* failed range check? */
    297 		fp = fdp->fd_ofiles[fd];	/* convert to file pointer */
    298 		if (fp == NULL)
    299 			return(EBADF);
    300 
    301 		if (fp->f_type != DTYPE_VNODE)
    302 			return (ENODEV);		/* only mmap vnodes! */
    303 		vp = (struct vnode *)fp->f_data;	/* convert to vnode */
    304 
    305 		if (vp->v_type != VREG && vp->v_type != VCHR)
    306 			return (ENODEV);	/* only REG/CHR support mmap */
    307 
    308 		/* special case: catch SunOS style /dev/zero */
    309 		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
    310 			flags |= MAP_ANON;
    311 			goto is_anon;
    312 		}
    313 
    314 		/*
    315 		 * Old programs may not select a specific sharing type, so
    316 		 * default to an appropriate one.
    317 		 *
    318 		 * XXX: how does MAP_ANON fit in the picture?
    319 		 */
    320 		if ((flags & (MAP_SHARED|MAP_PRIVATE|MAP_COPY)) == 0) {
    321 #if defined(DEBUG)
    322 			printf("WARNING: defaulted mmap() share type to "
    323 			   "%s (pid %d comm %s)\n", vp->v_type == VCHR ?
    324 			   "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
    325 			    p->p_comm);
    326 #endif
    327 			if (vp->v_type == VCHR)
    328 				flags |= MAP_SHARED;	/* for a device */
    329 			else
    330 				flags |= MAP_PRIVATE;	/* for a file */
    331 		}
    332 
    333 		/*
    334 		 * MAP_PRIVATE device mappings don't make sense (and aren't
    335 		 * supported anyway).  However, some programs rely on this,
    336 		 * so just change it to MAP_SHARED.
    337 		 */
    338 		if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
    339 #if defined(DIAGNOSTIC)
    340 			printf("WARNING: converted MAP_PRIVATE device mapping "
    341 			    "to MAP_SHARED (pid %d comm %s)\n", p->p_pid,
    342 			    p->p_comm);
    343 #endif
    344 			flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
    345 		}
    346 
    347 		/*
    348 		 * now check protection
    349 		 */
    350 
    351 		maxprot = VM_PROT_EXECUTE;
    352 
    353 		/* check read access */
    354 		if (fp->f_flag & FREAD)
    355 			maxprot |= VM_PROT_READ;
    356 		else if (prot & PROT_READ)
    357 			return (EACCES);
    358 
    359 		/* check write access, shared case first */
    360 		if (flags & MAP_SHARED) {
    361 			/*
    362 			 * if the file is writable, only add PROT_WRITE to
    363 			 * maxprot if the file is not immutable, append-only.
    364 			 * otherwise, if we have asked for PROT_WRITE, return
    365 			 * EPERM.
    366 			 */
    367 			if (fp->f_flag & FWRITE) {
    368 				if ((error =
    369 				    VOP_GETATTR(vp, &va, p->p_ucred, p)))
    370 					return (error);
    371 				if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
    372 					maxprot |= VM_PROT_WRITE;
    373 				else if (prot & PROT_WRITE)
    374 					return (EPERM);
    375 			}
    376 			else if (prot & PROT_WRITE)
    377 				return (EACCES);
    378 		} else {
    379 			/* MAP_PRIVATE mappings can always write to */
    380 			maxprot |= VM_PROT_WRITE;
    381 		}
    382 
    383 		/*
    384 		 * set handle to vnode
    385 		 */
    386 
    387 		handle = (caddr_t)vp;
    388 
    389 	} else {		/* MAP_ANON case */
    390 
    391 		if (fd != -1)
    392 			return (EINVAL);
    393 
    394 is_anon:		/* label for SunOS style /dev/zero */
    395 		handle = NULL;
    396 		maxprot = VM_PROT_ALL;
    397 		pos = 0;
    398 	}
    399 
    400 	/*
    401 	 * now let kernel internal function uvm_mmap do the work.
    402 	 */
    403 
    404 	error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
    405 	    flags, handle, pos);
    406 
    407 	if (error == 0)
    408 		/* remember to add offset */
    409 		*retval = (register_t)(addr + pageoff);
    410 
    411 	return (error);
    412 }
    413 
    414 /*
    415  * sys___msync13: the msync system call (a front-end for flush)
    416  */
    417 
    418 int
    419 sys___msync13(p, v, retval)
    420 	struct proc *p;
    421 	void *v;
    422 	register_t *retval;
    423 {
    424 	struct sys___msync13_args /* {
    425 		syscallarg(caddr_t) addr;
    426 		syscallarg(size_t) len;
    427 		syscallarg(int) flags;
    428 	} */ *uap = v;
    429 	vm_offset_t addr;
    430 	vm_size_t size, pageoff;
    431 	vm_map_t map;
    432 	int rv, flags, uvmflags;
    433 
    434 	/*
    435 	 * extract syscall args from the uap
    436 	 */
    437 
    438 	addr = (vm_offset_t)SCARG(uap, addr);
    439 	size = (vm_size_t)SCARG(uap, len);
    440 	flags = SCARG(uap, flags);
    441 
    442 	/* sanity check flags */
    443 	if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
    444 			(flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
    445 			(flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
    446 	  return (EINVAL);
    447 	if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
    448 	  flags |= MS_SYNC;
    449 
    450 	/*
    451 	 * align the address to a page boundary, and adjust the size accordingly
    452 	 */
    453 
    454 	pageoff = (addr & PAGE_MASK);
    455 	addr -= pageoff;
    456 	size += pageoff;
    457 	size = (vm_size_t) round_page(size);
    458 
    459 	/* disallow wrap-around. */
    460 	if (addr + size < addr)
    461 		return (EINVAL);
    462 
    463 	/*
    464 	 * get map
    465 	 */
    466 
    467 	map = &p->p_vmspace->vm_map;
    468 
    469 	/*
    470 	 * XXXCDC: do we really need this semantic?
    471 	 *
    472 	 * XXX Gak!  If size is zero we are supposed to sync "all modified
    473 	 * pages with the region containing addr".  Unfortunately, we
    474 	 * don't really keep track of individual mmaps so we approximate
    475 	 * by flushing the range of the map entry containing addr.
    476 	 * This can be incorrect if the region splits or is coalesced
    477 	 * with a neighbor.
    478 	 */
    479 	if (size == 0) {
    480 		vm_map_entry_t entry;
    481 
    482 		vm_map_lock_read(map);
    483 		rv = uvm_map_lookup_entry(map, addr, &entry);
    484 		if (rv == TRUE) {
    485 			addr = entry->start;
    486 			size = entry->end - entry->start;
    487 		}
    488 		vm_map_unlock_read(map);
    489 		if (rv == FALSE)
    490 			return (EINVAL);
    491 	}
    492 
    493 	/*
    494 	 * translate MS_ flags into PGO_ flags
    495 	 */
    496 	uvmflags = (flags & MS_INVALIDATE) ? PGO_FREE : 0;
    497 	if (flags & MS_SYNC)
    498 		uvmflags |= PGO_SYNCIO;
    499 	else
    500 		uvmflags |= PGO_SYNCIO;	 /* XXXCDC: force sync for now! */
    501 
    502 	/*
    503 	 * doit!
    504 	 */
    505 	rv = uvm_map_clean(map, addr, addr+size, uvmflags);
    506 
    507 	/*
    508 	 * and return...
    509 	 */
    510 	switch (rv) {
    511 	case KERN_SUCCESS:
    512 		return(0);
    513 	case KERN_INVALID_ADDRESS:
    514 		return (ENOMEM);
    515 	case KERN_FAILURE:
    516 		return (EIO);
    517 	case KERN_PAGES_LOCKED:	/* XXXCDC: uvm doesn't return this */
    518 		return (EBUSY);
    519 	default:
    520 		return (EINVAL);
    521 	}
    522 	/*NOTREACHED*/
    523 }
    524 
    525 /*
    526  * sys_munmap: unmap a users memory
    527  */
    528 
    529 int
    530 sys_munmap(p, v, retval)
    531 	register struct proc *p;
    532 	void *v;
    533 	register_t *retval;
    534 {
    535 	register struct sys_munmap_args /* {
    536 		syscallarg(caddr_t) addr;
    537 		syscallarg(size_t) len;
    538 	} */ *uap = v;
    539 	vm_offset_t addr;
    540 	vm_size_t size, pageoff;
    541 	vm_map_t map;
    542 	vm_offset_t vm_min_address = VM_MIN_ADDRESS;
    543 	struct vm_map_entry *dead_entries;
    544 
    545 	/*
    546 	 * get syscall args...
    547 	 */
    548 
    549 	addr = (vm_offset_t) SCARG(uap, addr);
    550 	size = (vm_size_t) SCARG(uap, len);
    551 
    552 	/*
    553 	 * align the address to a page boundary, and adjust the size accordingly
    554 	 */
    555 
    556 	pageoff = (addr & PAGE_MASK);
    557 	addr -= pageoff;
    558 	size += pageoff;
    559 	size = (vm_size_t) round_page(size);
    560 
    561 	if ((int)size < 0)
    562 		return (EINVAL);
    563 	if (size == 0)
    564 		return (0);
    565 
    566 	/*
    567 	 * Check for illegal addresses.  Watch out for address wrap...
    568 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
    569 	 */
    570 	if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
    571 		return (EINVAL);
    572 	if (vm_min_address > 0 && addr < vm_min_address)
    573 		return (EINVAL);
    574 	if (addr > addr + size)
    575 		return (EINVAL);
    576 	map = &p->p_vmspace->vm_map;
    577 
    578 
    579 	vm_map_lock(map);	/* lock map so we can checkprot */
    580 
    581 	/*
    582 	 * interesting system call semantic: make sure entire range is
    583 	 * allocated before allowing an unmap.
    584 	 */
    585 
    586 	if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
    587 		vm_map_unlock(map);
    588 		return (EINVAL);
    589 	}
    590 
    591 	/*
    592 	 * doit!
    593 	 */
    594 	(void) uvm_unmap_remove(map, addr, addr + size, 0, &dead_entries);
    595 
    596 	vm_map_unlock(map);	/* and unlock */
    597 
    598 	if (dead_entries != NULL)
    599 		uvm_unmap_detach(dead_entries, 0);
    600 
    601 	return (0);
    602 }
    603 
    604 /*
    605  * sys_mprotect: the mprotect system call
    606  */
    607 
    608 int
    609 sys_mprotect(p, v, retval)
    610 	struct proc *p;
    611 	void *v;
    612 	register_t *retval;
    613 {
    614 	struct sys_mprotect_args /* {
    615 		syscallarg(caddr_t) addr;
    616 		syscallarg(int) len;
    617 		syscallarg(int) prot;
    618 	} */ *uap = v;
    619 	vm_offset_t addr;
    620 	vm_size_t size, pageoff;
    621 	vm_prot_t prot;
    622 	int rv;
    623 
    624 	/*
    625 	 * extract syscall args from uap
    626 	 */
    627 
    628 	addr = (vm_offset_t)SCARG(uap, addr);
    629 	size = (vm_size_t)SCARG(uap, len);
    630 	prot = SCARG(uap, prot) & VM_PROT_ALL;
    631 
    632 	/*
    633 	 * align the address to a page boundary, and adjust the size accordingly
    634 	 */
    635 	pageoff = (addr & PAGE_MASK);
    636 	addr -= pageoff;
    637 	size += pageoff;
    638 	size = (vm_size_t) round_page(size);
    639 	if ((int)size < 0)
    640 		return (EINVAL);
    641 
    642 	/*
    643 	 * doit
    644 	 */
    645 
    646 	rv = uvm_map_protect(&p->p_vmspace->vm_map,
    647 			   addr, addr+size, prot, FALSE);
    648 
    649 	if (rv == KERN_SUCCESS)
    650 		return (0);
    651 	if (rv == KERN_PROTECTION_FAILURE)
    652 		return (EACCES);
    653 	return (EINVAL);
    654 }
    655 
    656 /*
    657  * sys_minherit: the minherit system call
    658  */
    659 
    660 int
    661 sys_minherit(p, v, retval)
    662 	struct proc *p;
    663 	void *v;
    664 	register_t *retval;
    665 {
    666 	struct sys_minherit_args /* {
    667 		syscallarg(caddr_t) addr;
    668 		syscallarg(int) len;
    669 		syscallarg(int) inherit;
    670 	} */ *uap = v;
    671 	vm_offset_t addr;
    672 	vm_size_t size, pageoff;
    673 	register vm_inherit_t inherit;
    674 
    675 	addr = (vm_offset_t)SCARG(uap, addr);
    676 	size = (vm_size_t)SCARG(uap, len);
    677 	inherit = SCARG(uap, inherit);
    678 	/*
    679 	 * align the address to a page boundary, and adjust the size accordingly
    680 	 */
    681 
    682 	pageoff = (addr & PAGE_MASK);
    683 	addr -= pageoff;
    684 	size += pageoff;
    685 	size = (vm_size_t) round_page(size);
    686 
    687 	if ((int)size < 0)
    688 		return (EINVAL);
    689 
    690 	switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
    691 			 inherit)) {
    692 	case KERN_SUCCESS:
    693 		return (0);
    694 	case KERN_PROTECTION_FAILURE:
    695 		return (EACCES);
    696 	}
    697 	return (EINVAL);
    698 }
    699 
    700 /*
    701  * sys_mlock: memory lock
    702  */
    703 
    704 int
    705 sys_mlock(p, v, retval)
    706 	struct proc *p;
    707 	void *v;
    708 	register_t *retval;
    709 {
    710 	struct sys_mlock_args /* {
    711 		syscallarg(caddr_t) addr;
    712 		syscallarg(size_t) len;
    713 	} */ *uap = v;
    714 	vm_offset_t addr;
    715 	vm_size_t size, pageoff;
    716 	int error;
    717 
    718 	/*
    719 	 * extract syscall args from uap
    720 	 */
    721 	addr = (vm_offset_t)SCARG(uap, addr);
    722 	size = (vm_size_t)SCARG(uap, len);
    723 
    724 	/*
    725 	 * align the address to a page boundary and adjust the size accordingly
    726 	 */
    727 	pageoff = (addr & PAGE_MASK);
    728 	addr -= pageoff;
    729 	size += pageoff;
    730 	size = (vm_size_t) round_page(size);
    731 
    732 	/* disallow wrap-around. */
    733 	if (addr + (int)size < addr)
    734 		return (EINVAL);
    735 
    736 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
    737 		return (EAGAIN);
    738 
    739 #ifdef pmap_wired_count
    740 	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
    741 			p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
    742 		return (EAGAIN);
    743 #else
    744 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    745 		return (error);
    746 #endif
    747 
    748 	error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
    749 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
    750 }
    751 
    752 /*
    753  * sys_munlock: unlock wired pages
    754  */
    755 
    756 int
    757 sys_munlock(p, v, retval)
    758 	struct proc *p;
    759 	void *v;
    760 	register_t *retval;
    761 {
    762 	struct sys_munlock_args /* {
    763 		syscallarg(caddr_t) addr;
    764 		syscallarg(size_t) len;
    765 	} */ *uap = v;
    766 	vm_offset_t addr;
    767 	vm_size_t size, pageoff;
    768 	int error;
    769 
    770 	/*
    771 	 * extract syscall args from uap
    772 	 */
    773 
    774 	addr = (vm_offset_t)SCARG(uap, addr);
    775 	size = (vm_size_t)SCARG(uap, len);
    776 
    777 	/*
    778 	 * align the address to a page boundary, and adjust the size accordingly
    779 	 */
    780 	pageoff = (addr & PAGE_MASK);
    781 	addr -= pageoff;
    782 	size += pageoff;
    783 	size = (vm_size_t) round_page(size);
    784 
    785 	/* disallow wrap-around. */
    786 	if (addr + (int)size < addr)
    787 		return (EINVAL);
    788 
    789 #ifndef pmap_wired_count
    790 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    791 		return (error);
    792 #endif
    793 
    794 	error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
    795 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
    796 }
    797 
    798 /*
    799  * uvm_mmap: internal version of mmap
    800  *
    801  * - used by sys_mmap, exec, and sysv shm
    802  * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true,
    803  *	sysv shm uses "named anonymous memory")
    804  * - caller must page-align the file offset
    805  */
    806 
    807 int
    808 uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
    809 	vm_map_t map;
    810 	vm_offset_t *addr;
    811 	vm_size_t size;
    812 	vm_prot_t prot, maxprot;
    813 	int flags;
    814 	caddr_t handle;		/* XXX: VNODE? */
    815 	vm_offset_t foff;
    816 {
    817 	struct uvm_object *uobj;
    818 	struct vnode *vp;
    819 	int retval;
    820 	int advice = UVM_ADV_NORMAL;
    821 	uvm_flag_t uvmflag = 0;
    822 
    823 	/*
    824 	 * check params
    825 	 */
    826 
    827 	if (size == 0)
    828 		return(0);
    829 	if (foff & PAGE_MASK)
    830 		return(EINVAL);
    831 	if ((prot & maxprot) != prot)
    832 		return(EINVAL);
    833 
    834 	/*
    835 	 * for non-fixed mappings, round off the suggested address.
    836 	 * for fixed mappings, check alignment and zap old mappings.
    837 	 */
    838 
    839 	if ((flags & MAP_FIXED) == 0) {
    840 		*addr = round_page(*addr);	/* round */
    841 	} else {
    842 
    843 		if (*addr & PAGE_MASK)
    844 			return(EINVAL);
    845 		uvmflag |= UVM_FLAG_FIXED;
    846 		(void) uvm_unmap(map, *addr, *addr + size, 0);	/* zap! */
    847 	}
    848 
    849 	/*
    850 	 * handle anon vs. non-anon mappings.   for non-anon mappings attach
    851 	 * to underlying vm object.
    852 	 */
    853 
    854 	if (flags & MAP_ANON) {
    855 
    856 		foff = UVM_UNKNOWN_OFFSET;
    857 		uobj = NULL;
    858 		if ((flags & MAP_SHARED) == 0)
    859 			/* XXX: defer amap create */
    860 			uvmflag |= UVM_FLAG_COPYONW;
    861 		else
    862 			/* shared: create amap now */
    863 			uvmflag |= UVM_FLAG_OVERLAY;
    864 
    865 	} else {
    866 
    867 		vp = (struct vnode *) handle;	/* get vnode */
    868 		if (vp->v_type != VCHR) {
    869 			uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
    870 			   maxprot : (maxprot & ~VM_PROT_WRITE));
    871 
    872 			/*
    873 			 * XXXCDC: hack from old code
    874 			 * don't allow vnodes which have been mapped
    875 			 * shared-writeable to persist [forces them to be
    876 			 * flushed out when last reference goes].
    877 			 * XXXCDC: interesting side effect: avoids a bug.
    878 			 * note that in WRITE [ufs_readwrite.c] that we
    879 			 * allocate buffer, uncache, and then do the write.
    880 			 * the problem with this is that if the uncache causes
    881 			 * VM data to be flushed to the same area of the file
    882 			 * we are writing to... in that case we've got the
    883 			 * buffer locked and our process goes to sleep forever.
    884 			 *
    885 			 * XXXCDC: checking maxprot protects us from the
    886 			 * "persistbug" program but this is not a long term
    887 			 * solution.
    888 			 *
    889 			 * XXXCDC: we don't bother calling uncache with the vp
    890 			 * VOP_LOCKed since we know that we are already
    891 			 * holding a valid reference to the uvn (from the
    892 			 * uvn_attach above), and thus it is impossible for
    893 			 * the uncache to kill the uvn and trigger I/O.
    894 			 */
    895 			if (flags & MAP_SHARED) {
    896 				if ((prot & VM_PROT_WRITE) ||
    897 				    (maxprot & VM_PROT_WRITE)) {
    898 					uvm_vnp_uncache(vp);
    899 				}
    900 			}
    901 
    902 		} else {
    903 			uobj = udv_attach((void *) &vp->v_rdev,
    904 			    (flags & MAP_SHARED) ?
    905 			    maxprot : (maxprot & ~VM_PROT_WRITE));
    906 			advice = UVM_ADV_RANDOM;
    907 		}
    908 
    909 		if (uobj == NULL)
    910 			return((vp->v_type == VCHR) ? EINVAL : ENOMEM);
    911 
    912 		if ((flags & MAP_SHARED) == 0)
    913 			uvmflag |= UVM_FLAG_COPYONW;
    914 	}
    915 
    916 	/*
    917 	 * set up mapping flags
    918 	 */
    919 
    920 	uvmflag = UVM_MAPFLAG(prot, maxprot,
    921 			(flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
    922 			advice, uvmflag);
    923 
    924 	/*
    925 	 * do it!
    926 	 */
    927 
    928 	retval = uvm_map(map, addr, size, uobj, foff, uvmflag);
    929 
    930 	if (retval == KERN_SUCCESS)
    931 		return(0);
    932 
    933 	/*
    934 	 * errors: first detach from the uobj, if any.
    935 	 */
    936 
    937 	if (uobj)
    938 		uobj->pgops->pgo_detach(uobj);
    939 
    940 	switch (retval) {
    941 	case KERN_INVALID_ADDRESS:
    942 	case KERN_NO_SPACE:
    943 		return(ENOMEM);
    944 	case KERN_PROTECTION_FAILURE:
    945 		return(EACCES);
    946 	}
    947 	return(EINVAL);
    948 }
    949