Home | History | Annotate | Line # | Download | only in uvm
uvm_mmap.c revision 1.20
      1 /*	$NetBSD: uvm_mmap.c,v 1.20 1999/05/03 09:08:28 mrg Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
      5  * Copyright (c) 1991, 1993 The Regents of the University of California.
      6  * Copyright (c) 1988 University of Utah.
      7  *
      8  * All rights reserved.
      9  *
     10  * This code is derived from software contributed to Berkeley by
     11  * the Systems Programming Group of the University of Utah Computer
     12  * Science Department.
     13  *
     14  * Redistribution and use in source and binary forms, with or without
     15  * modification, are permitted provided that the following conditions
     16  * are met:
     17  * 1. Redistributions of source code must retain the above copyright
     18  *    notice, this list of conditions and the following disclaimer.
     19  * 2. Redistributions in binary form must reproduce the above copyright
     20  *    notice, this list of conditions and the following disclaimer in the
     21  *    documentation and/or other materials provided with the distribution.
     22  * 3. All advertising materials mentioning features or use of this software
     23  *    must display the following acknowledgement:
     24  *      This product includes software developed by the Charles D. Cranor,
     25  *	Washington University, University of California, Berkeley and
     26  *	its contributors.
     27  * 4. Neither the name of the University nor the names of its contributors
     28  *    may be used to endorse or promote products derived from this software
     29  *    without specific prior written permission.
     30  *
     31  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     32  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     33  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     34  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     35  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     36  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     37  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     38  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     39  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     40  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     41  * SUCH DAMAGE.
     42  *
     43  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
     44  *      @(#)vm_mmap.c   8.5 (Berkeley) 5/19/94
     45  * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
     46  */
     47 
     48 /*
     49  * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
     50  * function.
     51  */
     52 #include <sys/param.h>
     53 #include <sys/systm.h>
     54 #include <sys/file.h>
     55 #include <sys/filedesc.h>
     56 #include <sys/resourcevar.h>
     57 #include <sys/mman.h>
     58 #include <sys/mount.h>
     59 #include <sys/proc.h>
     60 #include <sys/malloc.h>
     61 #include <sys/vnode.h>
     62 #include <sys/conf.h>
     63 #include <sys/stat.h>
     64 
     65 #include <miscfs/specfs/specdev.h>
     66 
     67 #include <vm/vm.h>
     68 #include <vm/vm_page.h>
     69 #include <vm/vm_kern.h>
     70 
     71 #include <sys/syscallargs.h>
     72 
     73 #include <uvm/uvm.h>
     74 #include <uvm/uvm_device.h>
     75 #include <uvm/uvm_vnode.h>
     76 
     77 
     78 /*
     79  * unimplemented VM system calls:
     80  */
     81 
     82 /*
     83  * sys_sbrk: sbrk system call.
     84  */
     85 
     86 /* ARGSUSED */
     87 int
     88 sys_sbrk(p, v, retval)
     89 	struct proc *p;
     90 	void *v;
     91 	register_t *retval;
     92 {
     93 #if 0
     94 	struct sys_sbrk_args /* {
     95 		syscallarg(int) incr;
     96 	} */ *uap = v;
     97 #endif
     98 
     99 	return (ENOSYS);
    100 }
    101 
    102 /*
    103  * sys_sstk: sstk system call.
    104  */
    105 
    106 /* ARGSUSED */
    107 int
    108 sys_sstk(p, v, retval)
    109 	struct proc *p;
    110 	void *v;
    111 	register_t *retval;
    112 {
    113 #if 0
    114 	struct sys_sstk_args /* {
    115 		syscallarg(int) incr;
    116 	} */ *uap = v;
    117 #endif
    118 
    119 	return (ENOSYS);
    120 }
    121 
    122 /*
    123  * sys_madvise: give advice about memory usage.
    124  */
    125 
    126 /* ARGSUSED */
    127 int
    128 sys_madvise(p, v, retval)
    129 	struct proc *p;
    130 	void *v;
    131 	register_t *retval;
    132 {
    133 #if 0
    134 	struct sys_madvise_args /* {
    135 		syscallarg(caddr_t) addr;
    136 		syscallarg(size_t) len;
    137 		syscallarg(int) behav;
    138 	} */ *uap = v;
    139 #endif
    140 
    141 	return (ENOSYS);
    142 }
    143 
    144 /*
    145  * sys_mincore: determine if pages are in core or not.
    146  */
    147 
    148 /* ARGSUSED */
    149 int
    150 sys_mincore(p, v, retval)
    151 	struct proc *p;
    152 	void *v;
    153 	register_t *retval;
    154 {
    155 #if 0
    156 	struct sys_mincore_args /* {
    157 		syscallarg(caddr_t) addr;
    158 		syscallarg(size_t) len;
    159 		syscallarg(char *) vec;
    160 	} */ *uap = v;
    161 #endif
    162 
    163 	return (ENOSYS);
    164 }
    165 
    166 #if 0
    167 /*
    168  * munmapfd: unmap file descriptor
    169  *
    170  * XXX: is this acutally a useful function?   could it be useful?
    171  */
    172 
    173 void
    174 munmapfd(p, fd)
    175 	struct proc *p;
    176 	int fd;
    177 {
    178 
    179 	/*
    180 	 * XXX should vm_deallocate any regions mapped to this file
    181 	 */
    182 	p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
    183 }
    184 #endif
    185 
    186 /*
    187  * sys_mmap: mmap system call.
    188  *
    189  * => file offest and address may not be page aligned
    190  *    - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
    191  *    - if address isn't page aligned the mapping starts at trunc_page(addr)
    192  *      and the return value is adjusted up by the page offset.
    193  */
    194 
    195 int
    196 sys_mmap(p, v, retval)
    197 	struct proc *p;
    198 	void *v;
    199 	register_t *retval;
    200 {
    201 	register struct sys_mmap_args /* {
    202 		syscallarg(caddr_t) addr;
    203 		syscallarg(size_t) len;
    204 		syscallarg(int) prot;
    205 		syscallarg(int) flags;
    206 		syscallarg(int) fd;
    207 		syscallarg(long) pad;
    208 		syscallarg(off_t) pos;
    209 	} */ *uap = v;
    210 	vaddr_t addr;
    211 	struct vattr va;
    212 	off_t pos;
    213 	vsize_t size, pageoff;
    214 	vm_prot_t prot, maxprot;
    215 	int flags, fd;
    216 	vaddr_t vm_min_address = VM_MIN_ADDRESS;
    217 	register struct filedesc *fdp = p->p_fd;
    218 	register struct file *fp;
    219 	struct vnode *vp;
    220 	caddr_t handle;
    221 	int error;
    222 
    223 	/*
    224 	 * first, extract syscall args from the uap.
    225 	 */
    226 
    227 	addr = (vaddr_t) SCARG(uap, addr);
    228 	size = (vsize_t) SCARG(uap, len);
    229 	prot = SCARG(uap, prot) & VM_PROT_ALL;
    230 	flags = SCARG(uap, flags);
    231 	fd = SCARG(uap, fd);
    232 	pos = SCARG(uap, pos);
    233 
    234 	/*
    235 	 * make sure that the newsize fits within a vaddr_t
    236 	 * XXX: need to revise addressing data types
    237 	 */
    238 	if (pos + size > (vaddr_t)-PAGE_SIZE) {
    239 #ifdef DEBUG
    240 		printf("mmap: pos=%qx, size=%lx too big\n", (long long)pos,
    241 		       (long)size);
    242 #endif
    243 		return (EINVAL);
    244 	}
    245 
    246 	/*
    247 	 * align file position and save offset.  adjust size.
    248 	 */
    249 
    250 	pageoff = (pos & PAGE_MASK);
    251 	pos  -= pageoff;
    252 	size += pageoff;			/* add offset */
    253 	size = (vsize_t) round_page(size);	/* round up */
    254 	if ((ssize_t) size < 0)
    255 		return (EINVAL);			/* don't allow wrap */
    256 
    257 	/*
    258 	 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
    259 	 */
    260 
    261 	if (flags & MAP_FIXED) {
    262 
    263 		/* ensure address and file offset are aligned properly */
    264 		addr -= pageoff;
    265 		if (addr & PAGE_MASK)
    266 			return (EINVAL);
    267 
    268 		if (VM_MAXUSER_ADDRESS > 0 &&
    269 		    (addr + size) > VM_MAXUSER_ADDRESS)
    270 			return (EINVAL);
    271 		if (vm_min_address > 0 && addr < vm_min_address)
    272 			return (EINVAL);
    273 		if (addr > addr + size)
    274 			return (EINVAL);		/* no wrapping! */
    275 
    276 	} else {
    277 
    278 		/*
    279 		 * not fixed: make sure we skip over the largest possible heap.
    280 		 * we will refine our guess later (e.g. to account for VAC, etc)
    281 		 */
    282 		if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
    283 			addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
    284 	}
    285 
    286 	/*
    287 	 * check for file mappings (i.e. not anonymous) and verify file.
    288 	 */
    289 
    290 	if ((flags & MAP_ANON) == 0) {
    291 
    292 		if (fd < 0 || fd >= fdp->fd_nfiles)
    293 			return(EBADF);		/* failed range check? */
    294 		fp = fdp->fd_ofiles[fd];	/* convert to file pointer */
    295 		if (fp == NULL)
    296 			return(EBADF);
    297 
    298 		if (fp->f_type != DTYPE_VNODE)
    299 			return (ENODEV);		/* only mmap vnodes! */
    300 		vp = (struct vnode *)fp->f_data;	/* convert to vnode */
    301 
    302 		if (vp->v_type != VREG && vp->v_type != VCHR &&
    303 		    vp->v_type != VBLK)
    304 			return (ENODEV);  /* only REG/CHR/BLK support mmap */
    305 
    306 		/* special case: catch SunOS style /dev/zero */
    307 		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
    308 			flags |= MAP_ANON;
    309 			goto is_anon;
    310 		}
    311 
    312 		/*
    313 		 * Old programs may not select a specific sharing type, so
    314 		 * default to an appropriate one.
    315 		 *
    316 		 * XXX: how does MAP_ANON fit in the picture?
    317 		 */
    318 		if ((flags & (MAP_SHARED|MAP_PRIVATE|MAP_COPY)) == 0) {
    319 #if defined(DEBUG)
    320 			printf("WARNING: defaulted mmap() share type to "
    321 			   "%s (pid %d comm %s)\n", vp->v_type == VCHR ?
    322 			   "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
    323 			    p->p_comm);
    324 #endif
    325 			if (vp->v_type == VCHR)
    326 				flags |= MAP_SHARED;	/* for a device */
    327 			else
    328 				flags |= MAP_PRIVATE;	/* for a file */
    329 		}
    330 
    331 		/*
    332 		 * MAP_PRIVATE device mappings don't make sense (and aren't
    333 		 * supported anyway).  However, some programs rely on this,
    334 		 * so just change it to MAP_SHARED.
    335 		 */
    336 		if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
    337 #if defined(DIAGNOSTIC)
    338 			printf("WARNING: converted MAP_PRIVATE device mapping "
    339 			    "to MAP_SHARED (pid %d comm %s)\n", p->p_pid,
    340 			    p->p_comm);
    341 #endif
    342 			flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
    343 		}
    344 
    345 		/*
    346 		 * now check protection
    347 		 */
    348 
    349 		maxprot = VM_PROT_EXECUTE;
    350 
    351 		/* check read access */
    352 		if (fp->f_flag & FREAD)
    353 			maxprot |= VM_PROT_READ;
    354 		else if (prot & PROT_READ)
    355 			return (EACCES);
    356 
    357 		/* check write access, shared case first */
    358 		if (flags & MAP_SHARED) {
    359 			/*
    360 			 * if the file is writable, only add PROT_WRITE to
    361 			 * maxprot if the file is not immutable, append-only.
    362 			 * otherwise, if we have asked for PROT_WRITE, return
    363 			 * EPERM.
    364 			 */
    365 			if (fp->f_flag & FWRITE) {
    366 				if ((error =
    367 				    VOP_GETATTR(vp, &va, p->p_ucred, p)))
    368 					return (error);
    369 				if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
    370 					maxprot |= VM_PROT_WRITE;
    371 				else if (prot & PROT_WRITE)
    372 					return (EPERM);
    373 			}
    374 			else if (prot & PROT_WRITE)
    375 				return (EACCES);
    376 		} else {
    377 			/* MAP_PRIVATE mappings can always write to */
    378 			maxprot |= VM_PROT_WRITE;
    379 		}
    380 
    381 		/*
    382 		 * set handle to vnode
    383 		 */
    384 
    385 		handle = (caddr_t)vp;
    386 
    387 	} else {		/* MAP_ANON case */
    388 
    389 		if (fd != -1)
    390 			return (EINVAL);
    391 
    392 is_anon:		/* label for SunOS style /dev/zero */
    393 		handle = NULL;
    394 		maxprot = VM_PROT_ALL;
    395 		pos = 0;
    396 	}
    397 
    398 	/*
    399 	 * now let kernel internal function uvm_mmap do the work.
    400 	 */
    401 
    402 	error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
    403 	    flags, handle, pos);
    404 
    405 	if (error == 0)
    406 		/* remember to add offset */
    407 		*retval = (register_t)(addr + pageoff);
    408 
    409 	return (error);
    410 }
    411 
    412 /*
    413  * sys___msync13: the msync system call (a front-end for flush)
    414  */
    415 
    416 int
    417 sys___msync13(p, v, retval)
    418 	struct proc *p;
    419 	void *v;
    420 	register_t *retval;
    421 {
    422 	struct sys___msync13_args /* {
    423 		syscallarg(caddr_t) addr;
    424 		syscallarg(size_t) len;
    425 		syscallarg(int) flags;
    426 	} */ *uap = v;
    427 	vaddr_t addr;
    428 	vsize_t size, pageoff;
    429 	vm_map_t map;
    430 	int rv, flags, uvmflags;
    431 
    432 	/*
    433 	 * extract syscall args from the uap
    434 	 */
    435 
    436 	addr = (vaddr_t)SCARG(uap, addr);
    437 	size = (vsize_t)SCARG(uap, len);
    438 	flags = SCARG(uap, flags);
    439 
    440 	/* sanity check flags */
    441 	if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
    442 			(flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
    443 			(flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
    444 	  return (EINVAL);
    445 	if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
    446 	  flags |= MS_SYNC;
    447 
    448 	/*
    449 	 * align the address to a page boundary, and adjust the size accordingly
    450 	 */
    451 
    452 	pageoff = (addr & PAGE_MASK);
    453 	addr -= pageoff;
    454 	size += pageoff;
    455 	size = (vsize_t) round_page(size);
    456 
    457 	/* disallow wrap-around. */
    458 	if (addr + size < addr)
    459 		return (EINVAL);
    460 
    461 	/*
    462 	 * get map
    463 	 */
    464 
    465 	map = &p->p_vmspace->vm_map;
    466 
    467 	/*
    468 	 * XXXCDC: do we really need this semantic?
    469 	 *
    470 	 * XXX Gak!  If size is zero we are supposed to sync "all modified
    471 	 * pages with the region containing addr".  Unfortunately, we
    472 	 * don't really keep track of individual mmaps so we approximate
    473 	 * by flushing the range of the map entry containing addr.
    474 	 * This can be incorrect if the region splits or is coalesced
    475 	 * with a neighbor.
    476 	 */
    477 	if (size == 0) {
    478 		vm_map_entry_t entry;
    479 
    480 		vm_map_lock_read(map);
    481 		rv = uvm_map_lookup_entry(map, addr, &entry);
    482 		if (rv == TRUE) {
    483 			addr = entry->start;
    484 			size = entry->end - entry->start;
    485 		}
    486 		vm_map_unlock_read(map);
    487 		if (rv == FALSE)
    488 			return (EINVAL);
    489 	}
    490 
    491 	/*
    492 	 * translate MS_ flags into PGO_ flags
    493 	 */
    494 	uvmflags = (flags & MS_INVALIDATE) ? PGO_FREE : 0;
    495 	if (flags & MS_SYNC)
    496 		uvmflags |= PGO_SYNCIO;
    497 	else
    498 		uvmflags |= PGO_SYNCIO;	 /* XXXCDC: force sync for now! */
    499 
    500 	/*
    501 	 * doit!
    502 	 */
    503 	rv = uvm_map_clean(map, addr, addr+size, uvmflags);
    504 
    505 	/*
    506 	 * and return...
    507 	 */
    508 	switch (rv) {
    509 	case KERN_SUCCESS:
    510 		return(0);
    511 	case KERN_INVALID_ADDRESS:
    512 		return (ENOMEM);
    513 	case KERN_FAILURE:
    514 		return (EIO);
    515 	case KERN_PAGES_LOCKED:	/* XXXCDC: uvm doesn't return this */
    516 		return (EBUSY);
    517 	default:
    518 		return (EINVAL);
    519 	}
    520 	/*NOTREACHED*/
    521 }
    522 
    523 /*
    524  * sys_munmap: unmap a users memory
    525  */
    526 
    527 int
    528 sys_munmap(p, v, retval)
    529 	register struct proc *p;
    530 	void *v;
    531 	register_t *retval;
    532 {
    533 	register struct sys_munmap_args /* {
    534 		syscallarg(caddr_t) addr;
    535 		syscallarg(size_t) len;
    536 	} */ *uap = v;
    537 	vaddr_t addr;
    538 	vsize_t size, pageoff;
    539 	vm_map_t map;
    540 	vaddr_t vm_min_address = VM_MIN_ADDRESS;
    541 	struct vm_map_entry *dead_entries;
    542 
    543 	/*
    544 	 * get syscall args...
    545 	 */
    546 
    547 	addr = (vaddr_t) SCARG(uap, addr);
    548 	size = (vsize_t) SCARG(uap, len);
    549 
    550 	/*
    551 	 * align the address to a page boundary, and adjust the size accordingly
    552 	 */
    553 
    554 	pageoff = (addr & PAGE_MASK);
    555 	addr -= pageoff;
    556 	size += pageoff;
    557 	size = (vsize_t) round_page(size);
    558 
    559 	if ((int)size < 0)
    560 		return (EINVAL);
    561 	if (size == 0)
    562 		return (0);
    563 
    564 	/*
    565 	 * Check for illegal addresses.  Watch out for address wrap...
    566 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
    567 	 */
    568 	if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
    569 		return (EINVAL);
    570 	if (vm_min_address > 0 && addr < vm_min_address)
    571 		return (EINVAL);
    572 	if (addr > addr + size)
    573 		return (EINVAL);
    574 	map = &p->p_vmspace->vm_map;
    575 
    576 
    577 	vm_map_lock(map);	/* lock map so we can checkprot */
    578 
    579 	/*
    580 	 * interesting system call semantic: make sure entire range is
    581 	 * allocated before allowing an unmap.
    582 	 */
    583 
    584 	if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
    585 		vm_map_unlock(map);
    586 		return (EINVAL);
    587 	}
    588 
    589 	/*
    590 	 * doit!
    591 	 */
    592 	(void) uvm_unmap_remove(map, addr, addr + size, &dead_entries);
    593 
    594 	vm_map_unlock(map);	/* and unlock */
    595 
    596 	if (dead_entries != NULL)
    597 		uvm_unmap_detach(dead_entries, 0);
    598 
    599 	return (0);
    600 }
    601 
    602 /*
    603  * sys_mprotect: the mprotect system call
    604  */
    605 
    606 int
    607 sys_mprotect(p, v, retval)
    608 	struct proc *p;
    609 	void *v;
    610 	register_t *retval;
    611 {
    612 	struct sys_mprotect_args /* {
    613 		syscallarg(caddr_t) addr;
    614 		syscallarg(int) len;
    615 		syscallarg(int) prot;
    616 	} */ *uap = v;
    617 	vaddr_t addr;
    618 	vsize_t size, pageoff;
    619 	vm_prot_t prot;
    620 	int rv;
    621 
    622 	/*
    623 	 * extract syscall args from uap
    624 	 */
    625 
    626 	addr = (vaddr_t)SCARG(uap, addr);
    627 	size = (vsize_t)SCARG(uap, len);
    628 	prot = SCARG(uap, prot) & VM_PROT_ALL;
    629 
    630 	/*
    631 	 * align the address to a page boundary, and adjust the size accordingly
    632 	 */
    633 	pageoff = (addr & PAGE_MASK);
    634 	addr -= pageoff;
    635 	size += pageoff;
    636 	size = (vsize_t) round_page(size);
    637 	if ((int)size < 0)
    638 		return (EINVAL);
    639 
    640 	/*
    641 	 * doit
    642 	 */
    643 
    644 	rv = uvm_map_protect(&p->p_vmspace->vm_map,
    645 			   addr, addr+size, prot, FALSE);
    646 
    647 	if (rv == KERN_SUCCESS)
    648 		return (0);
    649 	if (rv == KERN_PROTECTION_FAILURE)
    650 		return (EACCES);
    651 	return (EINVAL);
    652 }
    653 
    654 /*
    655  * sys_minherit: the minherit system call
    656  */
    657 
    658 int
    659 sys_minherit(p, v, retval)
    660 	struct proc *p;
    661 	void *v;
    662 	register_t *retval;
    663 {
    664 	struct sys_minherit_args /* {
    665 		syscallarg(caddr_t) addr;
    666 		syscallarg(int) len;
    667 		syscallarg(int) inherit;
    668 	} */ *uap = v;
    669 	vaddr_t addr;
    670 	vsize_t size, pageoff;
    671 	register vm_inherit_t inherit;
    672 
    673 	addr = (vaddr_t)SCARG(uap, addr);
    674 	size = (vsize_t)SCARG(uap, len);
    675 	inherit = SCARG(uap, inherit);
    676 	/*
    677 	 * align the address to a page boundary, and adjust the size accordingly
    678 	 */
    679 
    680 	pageoff = (addr & PAGE_MASK);
    681 	addr -= pageoff;
    682 	size += pageoff;
    683 	size = (vsize_t) round_page(size);
    684 
    685 	if ((int)size < 0)
    686 		return (EINVAL);
    687 
    688 	switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
    689 			 inherit)) {
    690 	case KERN_SUCCESS:
    691 		return (0);
    692 	case KERN_PROTECTION_FAILURE:
    693 		return (EACCES);
    694 	}
    695 	return (EINVAL);
    696 }
    697 
    698 /*
    699  * sys_mlock: memory lock
    700  */
    701 
    702 int
    703 sys_mlock(p, v, retval)
    704 	struct proc *p;
    705 	void *v;
    706 	register_t *retval;
    707 {
    708 	struct sys_mlock_args /* {
    709 		syscallarg(const void *) addr;
    710 		syscallarg(size_t) len;
    711 	} */ *uap = v;
    712 	vaddr_t addr;
    713 	vsize_t size, pageoff;
    714 	int error;
    715 
    716 	/*
    717 	 * extract syscall args from uap
    718 	 */
    719 	addr = (vaddr_t)SCARG(uap, addr);
    720 	size = (vsize_t)SCARG(uap, len);
    721 
    722 	/*
    723 	 * align the address to a page boundary and adjust the size accordingly
    724 	 */
    725 	pageoff = (addr & PAGE_MASK);
    726 	addr -= pageoff;
    727 	size += pageoff;
    728 	size = (vsize_t) round_page(size);
    729 
    730 	/* disallow wrap-around. */
    731 	if (addr + (int)size < addr)
    732 		return (EINVAL);
    733 
    734 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
    735 		return (EAGAIN);
    736 
    737 #ifdef pmap_wired_count
    738 	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
    739 			p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
    740 		return (EAGAIN);
    741 #else
    742 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    743 		return (error);
    744 #endif
    745 
    746 	error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
    747 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
    748 }
    749 
    750 /*
    751  * sys_munlock: unlock wired pages
    752  */
    753 
    754 int
    755 sys_munlock(p, v, retval)
    756 	struct proc *p;
    757 	void *v;
    758 	register_t *retval;
    759 {
    760 	struct sys_munlock_args /* {
    761 		syscallarg(const void *) addr;
    762 		syscallarg(size_t) len;
    763 	} */ *uap = v;
    764 	vaddr_t addr;
    765 	vsize_t size, pageoff;
    766 	int error;
    767 
    768 	/*
    769 	 * extract syscall args from uap
    770 	 */
    771 
    772 	addr = (vaddr_t)SCARG(uap, addr);
    773 	size = (vsize_t)SCARG(uap, len);
    774 
    775 	/*
    776 	 * align the address to a page boundary, and adjust the size accordingly
    777 	 */
    778 	pageoff = (addr & PAGE_MASK);
    779 	addr -= pageoff;
    780 	size += pageoff;
    781 	size = (vsize_t) round_page(size);
    782 
    783 	/* disallow wrap-around. */
    784 	if (addr + (int)size < addr)
    785 		return (EINVAL);
    786 
    787 #ifndef pmap_wired_count
    788 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    789 		return (error);
    790 #endif
    791 
    792 	error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
    793 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
    794 }
    795 
    796 /*
    797  * uvm_mmap: internal version of mmap
    798  *
    799  * - used by sys_mmap, exec, and sysv shm
    800  * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true,
    801  *	sysv shm uses "named anonymous memory")
    802  * - caller must page-align the file offset
    803  */
    804 
    805 int
    806 uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
    807 	vm_map_t map;
    808 	vaddr_t *addr;
    809 	vsize_t size;
    810 	vm_prot_t prot, maxprot;
    811 	int flags;
    812 	caddr_t handle;		/* XXX: VNODE? */
    813 	vaddr_t foff;
    814 {
    815 	struct uvm_object *uobj;
    816 	struct vnode *vp;
    817 	int retval;
    818 	int advice = UVM_ADV_NORMAL;
    819 	uvm_flag_t uvmflag = 0;
    820 
    821 	/*
    822 	 * check params
    823 	 */
    824 
    825 	if (size == 0)
    826 		return(0);
    827 	if (foff & PAGE_MASK)
    828 		return(EINVAL);
    829 	if ((prot & maxprot) != prot)
    830 		return(EINVAL);
    831 
    832 	/*
    833 	 * for non-fixed mappings, round off the suggested address.
    834 	 * for fixed mappings, check alignment and zap old mappings.
    835 	 */
    836 
    837 	if ((flags & MAP_FIXED) == 0) {
    838 		*addr = round_page(*addr);	/* round */
    839 	} else {
    840 
    841 		if (*addr & PAGE_MASK)
    842 			return(EINVAL);
    843 		uvmflag |= UVM_FLAG_FIXED;
    844 		(void) uvm_unmap(map, *addr, *addr + size);	/* zap! */
    845 	}
    846 
    847 	/*
    848 	 * handle anon vs. non-anon mappings.   for non-anon mappings attach
    849 	 * to underlying vm object.
    850 	 */
    851 
    852 	if (flags & MAP_ANON) {
    853 
    854 		foff = UVM_UNKNOWN_OFFSET;
    855 		uobj = NULL;
    856 		if ((flags & MAP_SHARED) == 0)
    857 			/* XXX: defer amap create */
    858 			uvmflag |= UVM_FLAG_COPYONW;
    859 		else
    860 			/* shared: create amap now */
    861 			uvmflag |= UVM_FLAG_OVERLAY;
    862 
    863 	} else {
    864 
    865 		vp = (struct vnode *) handle;	/* get vnode */
    866 		if (vp->v_type != VCHR) {
    867 			uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
    868 			   maxprot : (maxprot & ~VM_PROT_WRITE));
    869 
    870 			/*
    871 			 * XXXCDC: hack from old code
    872 			 * don't allow vnodes which have been mapped
    873 			 * shared-writeable to persist [forces them to be
    874 			 * flushed out when last reference goes].
    875 			 * XXXCDC: interesting side effect: avoids a bug.
    876 			 * note that in WRITE [ufs_readwrite.c] that we
    877 			 * allocate buffer, uncache, and then do the write.
    878 			 * the problem with this is that if the uncache causes
    879 			 * VM data to be flushed to the same area of the file
    880 			 * we are writing to... in that case we've got the
    881 			 * buffer locked and our process goes to sleep forever.
    882 			 *
    883 			 * XXXCDC: checking maxprot protects us from the
    884 			 * "persistbug" program but this is not a long term
    885 			 * solution.
    886 			 *
    887 			 * XXXCDC: we don't bother calling uncache with the vp
    888 			 * VOP_LOCKed since we know that we are already
    889 			 * holding a valid reference to the uvn (from the
    890 			 * uvn_attach above), and thus it is impossible for
    891 			 * the uncache to kill the uvn and trigger I/O.
    892 			 */
    893 			if (flags & MAP_SHARED) {
    894 				if ((prot & VM_PROT_WRITE) ||
    895 				    (maxprot & VM_PROT_WRITE)) {
    896 					uvm_vnp_uncache(vp);
    897 				}
    898 			}
    899 
    900 		} else {
    901 			uobj = udv_attach((void *) &vp->v_rdev,
    902 			    (flags & MAP_SHARED) ?
    903 			    maxprot : (maxprot & ~VM_PROT_WRITE), foff, size);
    904 			advice = UVM_ADV_RANDOM;
    905 		}
    906 
    907 		if (uobj == NULL)
    908 			return((vp->v_type == VREG) ? ENOMEM : EINVAL);
    909 
    910 		if ((flags & MAP_SHARED) == 0)
    911 			uvmflag |= UVM_FLAG_COPYONW;
    912 	}
    913 
    914 	/*
    915 	 * set up mapping flags
    916 	 */
    917 
    918 	uvmflag = UVM_MAPFLAG(prot, maxprot,
    919 			(flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
    920 			advice, uvmflag);
    921 
    922 	/*
    923 	 * do it!
    924 	 */
    925 
    926 	retval = uvm_map(map, addr, size, uobj, foff, uvmflag);
    927 
    928 	if (retval == KERN_SUCCESS)
    929 		return(0);
    930 
    931 	/*
    932 	 * errors: first detach from the uobj, if any.
    933 	 */
    934 
    935 	if (uobj)
    936 		uobj->pgops->pgo_detach(uobj);
    937 
    938 	switch (retval) {
    939 	case KERN_INVALID_ADDRESS:
    940 	case KERN_NO_SPACE:
    941 		return(ENOMEM);
    942 	case KERN_PROTECTION_FAILURE:
    943 		return(EACCES);
    944 	}
    945 	return(EINVAL);
    946 }
    947