Home | History | Annotate | Line # | Download | only in uvm
uvm_mmap.c revision 1.17
      1 /*	$NetBSD: uvm_mmap.c,v 1.17 1999/03/09 12:18:23 kleink Exp $	*/
      2 
      3 /*
      4  * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
      5  *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
      6  */
      7 /*
      8  * Copyright (c) 1997 Charles D. Cranor and Washington University.
      9  * Copyright (c) 1991, 1993 The Regents of the University of California.
     10  * Copyright (c) 1988 University of Utah.
     11  *
     12  * All rights reserved.
     13  *
     14  * This code is derived from software contributed to Berkeley by
     15  * the Systems Programming Group of the University of Utah Computer
     16  * Science Department.
     17  *
     18  * Redistribution and use in source and binary forms, with or without
     19  * modification, are permitted provided that the following conditions
     20  * are met:
     21  * 1. Redistributions of source code must retain the above copyright
     22  *    notice, this list of conditions and the following disclaimer.
     23  * 2. Redistributions in binary form must reproduce the above copyright
     24  *    notice, this list of conditions and the following disclaimer in the
     25  *    documentation and/or other materials provided with the distribution.
     26  * 3. All advertising materials mentioning features or use of this software
     27  *    must display the following acknowledgement:
     28  *      This product includes software developed by the Charles D. Cranor,
     29  *	Washington University, University of California, Berkeley and
     30  *	its contributors.
     31  * 4. Neither the name of the University nor the names of its contributors
     32  *    may be used to endorse or promote products derived from this software
     33  *    without specific prior written permission.
     34  *
     35  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     36  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     38  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     39  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     40  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     41  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     42  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     43  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     44  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     45  * SUCH DAMAGE.
     46  *
     47  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
     48  *      @(#)vm_mmap.c   8.5 (Berkeley) 5/19/94
     49  * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
     50  */
     51 
     52 /*
     53  * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
     54  * function.
     55  */
     56 #include <sys/param.h>
     57 #include <sys/systm.h>
     58 #include <sys/file.h>
     59 #include <sys/filedesc.h>
     60 #include <sys/resourcevar.h>
     61 #include <sys/mman.h>
     62 #include <sys/mount.h>
     63 #include <sys/proc.h>
     64 #include <sys/malloc.h>
     65 #include <sys/vnode.h>
     66 #include <sys/conf.h>
     67 #include <sys/stat.h>
     68 
     69 #include <miscfs/specfs/specdev.h>
     70 
     71 #include <vm/vm.h>
     72 #include <vm/vm_page.h>
     73 #include <vm/vm_kern.h>
     74 
     75 #include <sys/syscallargs.h>
     76 
     77 #include <uvm/uvm.h>
     78 #include <uvm/uvm_device.h>
     79 #include <uvm/uvm_vnode.h>
     80 
     81 
     82 /*
     83  * unimplemented VM system calls:
     84  */
     85 
     86 /*
     87  * sys_sbrk: sbrk system call.
     88  */
     89 
     90 /* ARGSUSED */
     91 int
     92 sys_sbrk(p, v, retval)
     93 	struct proc *p;
     94 	void *v;
     95 	register_t *retval;
     96 {
     97 #if 0
     98 	struct sys_sbrk_args /* {
     99 			  syscallarg(int) incr;
    100 			  } */ *uap = v;
    101 #endif
    102 
    103 	return (ENOSYS);
    104 }
    105 
    106 /*
    107  * sys_sstk: sstk system call.
    108  */
    109 
    110 /* ARGSUSED */
    111 int
    112 sys_sstk(p, v, retval)
    113 	struct proc *p;
    114 	void *v;
    115 	register_t *retval;
    116 {
    117 #if 0
    118 	struct sys_sstk_args /* {
    119 			  syscallarg(int) incr;
    120 			  } */ *uap = v;
    121 #endif
    122 
    123 	return (ENOSYS);
    124 }
    125 
    126 /*
    127  * sys_madvise: give advice about memory usage.
    128  */
    129 
    130 /* ARGSUSED */
    131 int
    132 sys_madvise(p, v, retval)
    133 	struct proc *p;
    134 	void *v;
    135 	register_t *retval;
    136 {
    137 #if 0
    138 	struct sys_madvise_args /* {
    139 			     syscallarg(caddr_t) addr;
    140 			     syscallarg(size_t) len;
    141 			     syscallarg(int) behav;
    142 			     } */ *uap = v;
    143 #endif
    144 
    145 	return (ENOSYS);
    146 }
    147 
    148 /*
    149  * sys_mincore: determine if pages are in core or not.
    150  */
    151 
    152 /* ARGSUSED */
    153 int
    154 sys_mincore(p, v, retval)
    155 	struct proc *p;
    156 	void *v;
    157 	register_t *retval;
    158 {
    159 #if 0
    160 	struct sys_mincore_args /* {
    161 			     syscallarg(caddr_t) addr;
    162 			     syscallarg(size_t) len;
    163 			     syscallarg(char *) vec;
    164 			     } */ *uap = v;
    165 #endif
    166 
    167 	return (ENOSYS);
    168 }
    169 
    170 #if 0
    171 /*
    172  * munmapfd: unmap file descriptor
    173  *
    174  * XXX: is this acutally a useful function?   could it be useful?
    175  */
    176 
    177 void
    178 munmapfd(p, fd)
    179 	struct proc *p;
    180 	int fd;
    181 {
    182 
    183 	/*
    184 	 * XXX should vm_deallocate any regions mapped to this file
    185 	 */
    186 	p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
    187 }
    188 #endif
    189 
    190 /*
    191  * sys_mmap: mmap system call.
    192  *
    193  * => file offest and address may not be page aligned
    194  *    - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
    195  *    - if address isn't page aligned the mapping starts at trunc_page(addr)
    196  *      and the return value is adjusted up by the page offset.
    197  */
    198 
    199 int
    200 sys_mmap(p, v, retval)
    201 	struct proc *p;
    202 	void *v;
    203 	register_t *retval;
    204 {
    205 	register struct sys_mmap_args /* {
    206 		syscallarg(caddr_t) addr;
    207 		syscallarg(size_t) len;
    208 		syscallarg(int) prot;
    209 		syscallarg(int) flags;
    210 		syscallarg(int) fd;
    211 		syscallarg(long) pad;
    212 		syscallarg(off_t) pos;
    213 	} */ *uap = v;
    214 	vaddr_t addr;
    215 	struct vattr va;
    216 	off_t pos;
    217 	vsize_t size, pageoff;
    218 	vm_prot_t prot, maxprot;
    219 	int flags, fd;
    220 	vaddr_t vm_min_address = VM_MIN_ADDRESS;
    221 	register struct filedesc *fdp = p->p_fd;
    222 	register struct file *fp;
    223 	struct vnode *vp;
    224 	caddr_t handle;
    225 	int error;
    226 
    227 	/*
    228 	 * first, extract syscall args from the uap.
    229 	 */
    230 
    231 	addr = (vaddr_t) SCARG(uap, addr);
    232 	size = (vsize_t) SCARG(uap, len);
    233 	prot = SCARG(uap, prot) & VM_PROT_ALL;
    234 	flags = SCARG(uap, flags);
    235 	fd = SCARG(uap, fd);
    236 	pos = SCARG(uap, pos);
    237 
    238 	/*
    239 	 * make sure that the newsize fits within a vaddr_t
    240 	 * XXX: need to revise addressing data types
    241 	 */
    242 	if (pos + size > (vaddr_t)-PAGE_SIZE) {
    243 #ifdef DEBUG
    244 		printf("mmap: pos=%qx, size=%lx too big\n", (long long)pos,
    245 		       (long)size);
    246 #endif
    247 		return (EINVAL);
    248 	}
    249 
    250 	/*
    251 	 * align file position and save offset.  adjust size.
    252 	 */
    253 
    254 	pageoff = (pos & PAGE_MASK);
    255 	pos  -= pageoff;
    256 	size += pageoff;			/* add offset */
    257 	size = (vsize_t) round_page(size);	/* round up */
    258 	if ((ssize_t) size < 0)
    259 		return (EINVAL);			/* don't allow wrap */
    260 
    261 	/*
    262 	 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
    263 	 */
    264 
    265 	if (flags & MAP_FIXED) {
    266 
    267 		/* ensure address and file offset are aligned properly */
    268 		addr -= pageoff;
    269 		if (addr & PAGE_MASK)
    270 			return (EINVAL);
    271 
    272 		if (VM_MAXUSER_ADDRESS > 0 &&
    273 		    (addr + size) > VM_MAXUSER_ADDRESS)
    274 			return (EINVAL);
    275 		if (vm_min_address > 0 && addr < vm_min_address)
    276 			return (EINVAL);
    277 		if (addr > addr + size)
    278 			return (EINVAL);		/* no wrapping! */
    279 
    280 	} else {
    281 
    282 		/*
    283 		 * not fixed: make sure we skip over the largest possible heap.
    284 		 * we will refine our guess later (e.g. to account for VAC, etc)
    285 		 */
    286 		if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
    287 			addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
    288 	}
    289 
    290 	/*
    291 	 * check for file mappings (i.e. not anonymous) and verify file.
    292 	 */
    293 
    294 	if ((flags & MAP_ANON) == 0) {
    295 
    296 		if (fd < 0 || fd >= fdp->fd_nfiles)
    297 			return(EBADF);		/* failed range check? */
    298 		fp = fdp->fd_ofiles[fd];	/* convert to file pointer */
    299 		if (fp == NULL)
    300 			return(EBADF);
    301 
    302 		if (fp->f_type != DTYPE_VNODE)
    303 			return (ENODEV);		/* only mmap vnodes! */
    304 		vp = (struct vnode *)fp->f_data;	/* convert to vnode */
    305 
    306 		if (vp->v_type != VREG && vp->v_type != VCHR &&
    307 		    vp->v_type != VBLK)
    308 			return (ENODEV);  /* only REG/CHR/BLK support mmap */
    309 
    310 		/* special case: catch SunOS style /dev/zero */
    311 		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
    312 			flags |= MAP_ANON;
    313 			goto is_anon;
    314 		}
    315 
    316 		/*
    317 		 * Old programs may not select a specific sharing type, so
    318 		 * default to an appropriate one.
    319 		 *
    320 		 * XXX: how does MAP_ANON fit in the picture?
    321 		 */
    322 		if ((flags & (MAP_SHARED|MAP_PRIVATE|MAP_COPY)) == 0) {
    323 #if defined(DEBUG)
    324 			printf("WARNING: defaulted mmap() share type to "
    325 			   "%s (pid %d comm %s)\n", vp->v_type == VCHR ?
    326 			   "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
    327 			    p->p_comm);
    328 #endif
    329 			if (vp->v_type == VCHR)
    330 				flags |= MAP_SHARED;	/* for a device */
    331 			else
    332 				flags |= MAP_PRIVATE;	/* for a file */
    333 		}
    334 
    335 		/*
    336 		 * MAP_PRIVATE device mappings don't make sense (and aren't
    337 		 * supported anyway).  However, some programs rely on this,
    338 		 * so just change it to MAP_SHARED.
    339 		 */
    340 		if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
    341 #if defined(DIAGNOSTIC)
    342 			printf("WARNING: converted MAP_PRIVATE device mapping "
    343 			    "to MAP_SHARED (pid %d comm %s)\n", p->p_pid,
    344 			    p->p_comm);
    345 #endif
    346 			flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
    347 		}
    348 
    349 		/*
    350 		 * now check protection
    351 		 */
    352 
    353 		maxprot = VM_PROT_EXECUTE;
    354 
    355 		/* check read access */
    356 		if (fp->f_flag & FREAD)
    357 			maxprot |= VM_PROT_READ;
    358 		else if (prot & PROT_READ)
    359 			return (EACCES);
    360 
    361 		/* check write access, shared case first */
    362 		if (flags & MAP_SHARED) {
    363 			/*
    364 			 * if the file is writable, only add PROT_WRITE to
    365 			 * maxprot if the file is not immutable, append-only.
    366 			 * otherwise, if we have asked for PROT_WRITE, return
    367 			 * EPERM.
    368 			 */
    369 			if (fp->f_flag & FWRITE) {
    370 				if ((error =
    371 				    VOP_GETATTR(vp, &va, p->p_ucred, p)))
    372 					return (error);
    373 				if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
    374 					maxprot |= VM_PROT_WRITE;
    375 				else if (prot & PROT_WRITE)
    376 					return (EPERM);
    377 			}
    378 			else if (prot & PROT_WRITE)
    379 				return (EACCES);
    380 		} else {
    381 			/* MAP_PRIVATE mappings can always write to */
    382 			maxprot |= VM_PROT_WRITE;
    383 		}
    384 
    385 		/*
    386 		 * set handle to vnode
    387 		 */
    388 
    389 		handle = (caddr_t)vp;
    390 
    391 	} else {		/* MAP_ANON case */
    392 
    393 		if (fd != -1)
    394 			return (EINVAL);
    395 
    396 is_anon:		/* label for SunOS style /dev/zero */
    397 		handle = NULL;
    398 		maxprot = VM_PROT_ALL;
    399 		pos = 0;
    400 	}
    401 
    402 	/*
    403 	 * now let kernel internal function uvm_mmap do the work.
    404 	 */
    405 
    406 	error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
    407 	    flags, handle, pos);
    408 
    409 	if (error == 0)
    410 		/* remember to add offset */
    411 		*retval = (register_t)(addr + pageoff);
    412 
    413 	return (error);
    414 }
    415 
    416 /*
    417  * sys___msync13: the msync system call (a front-end for flush)
    418  */
    419 
    420 int
    421 sys___msync13(p, v, retval)
    422 	struct proc *p;
    423 	void *v;
    424 	register_t *retval;
    425 {
    426 	struct sys___msync13_args /* {
    427 		syscallarg(caddr_t) addr;
    428 		syscallarg(size_t) len;
    429 		syscallarg(int) flags;
    430 	} */ *uap = v;
    431 	vaddr_t addr;
    432 	vsize_t size, pageoff;
    433 	vm_map_t map;
    434 	int rv, flags, uvmflags;
    435 
    436 	/*
    437 	 * extract syscall args from the uap
    438 	 */
    439 
    440 	addr = (vaddr_t)SCARG(uap, addr);
    441 	size = (vsize_t)SCARG(uap, len);
    442 	flags = SCARG(uap, flags);
    443 
    444 	/* sanity check flags */
    445 	if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
    446 			(flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
    447 			(flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
    448 	  return (EINVAL);
    449 	if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
    450 	  flags |= MS_SYNC;
    451 
    452 	/*
    453 	 * align the address to a page boundary, and adjust the size accordingly
    454 	 */
    455 
    456 	pageoff = (addr & PAGE_MASK);
    457 	addr -= pageoff;
    458 	size += pageoff;
    459 	size = (vsize_t) round_page(size);
    460 
    461 	/* disallow wrap-around. */
    462 	if (addr + size < addr)
    463 		return (EINVAL);
    464 
    465 	/*
    466 	 * get map
    467 	 */
    468 
    469 	map = &p->p_vmspace->vm_map;
    470 
    471 	/*
    472 	 * XXXCDC: do we really need this semantic?
    473 	 *
    474 	 * XXX Gak!  If size is zero we are supposed to sync "all modified
    475 	 * pages with the region containing addr".  Unfortunately, we
    476 	 * don't really keep track of individual mmaps so we approximate
    477 	 * by flushing the range of the map entry containing addr.
    478 	 * This can be incorrect if the region splits or is coalesced
    479 	 * with a neighbor.
    480 	 */
    481 	if (size == 0) {
    482 		vm_map_entry_t entry;
    483 
    484 		vm_map_lock_read(map);
    485 		rv = uvm_map_lookup_entry(map, addr, &entry);
    486 		if (rv == TRUE) {
    487 			addr = entry->start;
    488 			size = entry->end - entry->start;
    489 		}
    490 		vm_map_unlock_read(map);
    491 		if (rv == FALSE)
    492 			return (EINVAL);
    493 	}
    494 
    495 	/*
    496 	 * translate MS_ flags into PGO_ flags
    497 	 */
    498 	uvmflags = (flags & MS_INVALIDATE) ? PGO_FREE : 0;
    499 	if (flags & MS_SYNC)
    500 		uvmflags |= PGO_SYNCIO;
    501 	else
    502 		uvmflags |= PGO_SYNCIO;	 /* XXXCDC: force sync for now! */
    503 
    504 	/*
    505 	 * doit!
    506 	 */
    507 	rv = uvm_map_clean(map, addr, addr+size, uvmflags);
    508 
    509 	/*
    510 	 * and return...
    511 	 */
    512 	switch (rv) {
    513 	case KERN_SUCCESS:
    514 		return(0);
    515 	case KERN_INVALID_ADDRESS:
    516 		return (ENOMEM);
    517 	case KERN_FAILURE:
    518 		return (EIO);
    519 	case KERN_PAGES_LOCKED:	/* XXXCDC: uvm doesn't return this */
    520 		return (EBUSY);
    521 	default:
    522 		return (EINVAL);
    523 	}
    524 	/*NOTREACHED*/
    525 }
    526 
    527 /*
    528  * sys_munmap: unmap a users memory
    529  */
    530 
    531 int
    532 sys_munmap(p, v, retval)
    533 	register struct proc *p;
    534 	void *v;
    535 	register_t *retval;
    536 {
    537 	register struct sys_munmap_args /* {
    538 		syscallarg(caddr_t) addr;
    539 		syscallarg(size_t) len;
    540 	} */ *uap = v;
    541 	vaddr_t addr;
    542 	vsize_t size, pageoff;
    543 	vm_map_t map;
    544 	vaddr_t vm_min_address = VM_MIN_ADDRESS;
    545 	struct vm_map_entry *dead_entries;
    546 
    547 	/*
    548 	 * get syscall args...
    549 	 */
    550 
    551 	addr = (vaddr_t) SCARG(uap, addr);
    552 	size = (vsize_t) SCARG(uap, len);
    553 
    554 	/*
    555 	 * align the address to a page boundary, and adjust the size accordingly
    556 	 */
    557 
    558 	pageoff = (addr & PAGE_MASK);
    559 	addr -= pageoff;
    560 	size += pageoff;
    561 	size = (vsize_t) round_page(size);
    562 
    563 	if ((int)size < 0)
    564 		return (EINVAL);
    565 	if (size == 0)
    566 		return (0);
    567 
    568 	/*
    569 	 * Check for illegal addresses.  Watch out for address wrap...
    570 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
    571 	 */
    572 	if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
    573 		return (EINVAL);
    574 	if (vm_min_address > 0 && addr < vm_min_address)
    575 		return (EINVAL);
    576 	if (addr > addr + size)
    577 		return (EINVAL);
    578 	map = &p->p_vmspace->vm_map;
    579 
    580 
    581 	vm_map_lock(map);	/* lock map so we can checkprot */
    582 
    583 	/*
    584 	 * interesting system call semantic: make sure entire range is
    585 	 * allocated before allowing an unmap.
    586 	 */
    587 
    588 	if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
    589 		vm_map_unlock(map);
    590 		return (EINVAL);
    591 	}
    592 
    593 	/*
    594 	 * doit!
    595 	 */
    596 	(void) uvm_unmap_remove(map, addr, addr + size, &dead_entries);
    597 
    598 	vm_map_unlock(map);	/* and unlock */
    599 
    600 	if (dead_entries != NULL)
    601 		uvm_unmap_detach(dead_entries, 0);
    602 
    603 	return (0);
    604 }
    605 
    606 /*
    607  * sys_mprotect: the mprotect system call
    608  */
    609 
    610 int
    611 sys_mprotect(p, v, retval)
    612 	struct proc *p;
    613 	void *v;
    614 	register_t *retval;
    615 {
    616 	struct sys_mprotect_args /* {
    617 		syscallarg(caddr_t) addr;
    618 		syscallarg(int) len;
    619 		syscallarg(int) prot;
    620 	} */ *uap = v;
    621 	vaddr_t addr;
    622 	vsize_t size, pageoff;
    623 	vm_prot_t prot;
    624 	int rv;
    625 
    626 	/*
    627 	 * extract syscall args from uap
    628 	 */
    629 
    630 	addr = (vaddr_t)SCARG(uap, addr);
    631 	size = (vsize_t)SCARG(uap, len);
    632 	prot = SCARG(uap, prot) & VM_PROT_ALL;
    633 
    634 	/*
    635 	 * align the address to a page boundary, and adjust the size accordingly
    636 	 */
    637 	pageoff = (addr & PAGE_MASK);
    638 	addr -= pageoff;
    639 	size += pageoff;
    640 	size = (vsize_t) round_page(size);
    641 	if ((int)size < 0)
    642 		return (EINVAL);
    643 
    644 	/*
    645 	 * doit
    646 	 */
    647 
    648 	rv = uvm_map_protect(&p->p_vmspace->vm_map,
    649 			   addr, addr+size, prot, FALSE);
    650 
    651 	if (rv == KERN_SUCCESS)
    652 		return (0);
    653 	if (rv == KERN_PROTECTION_FAILURE)
    654 		return (EACCES);
    655 	return (EINVAL);
    656 }
    657 
    658 /*
    659  * sys_minherit: the minherit system call
    660  */
    661 
    662 int
    663 sys_minherit(p, v, retval)
    664 	struct proc *p;
    665 	void *v;
    666 	register_t *retval;
    667 {
    668 	struct sys_minherit_args /* {
    669 		syscallarg(caddr_t) addr;
    670 		syscallarg(int) len;
    671 		syscallarg(int) inherit;
    672 	} */ *uap = v;
    673 	vaddr_t addr;
    674 	vsize_t size, pageoff;
    675 	register vm_inherit_t inherit;
    676 
    677 	addr = (vaddr_t)SCARG(uap, addr);
    678 	size = (vsize_t)SCARG(uap, len);
    679 	inherit = SCARG(uap, inherit);
    680 	/*
    681 	 * align the address to a page boundary, and adjust the size accordingly
    682 	 */
    683 
    684 	pageoff = (addr & PAGE_MASK);
    685 	addr -= pageoff;
    686 	size += pageoff;
    687 	size = (vsize_t) round_page(size);
    688 
    689 	if ((int)size < 0)
    690 		return (EINVAL);
    691 
    692 	switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
    693 			 inherit)) {
    694 	case KERN_SUCCESS:
    695 		return (0);
    696 	case KERN_PROTECTION_FAILURE:
    697 		return (EACCES);
    698 	}
    699 	return (EINVAL);
    700 }
    701 
    702 /*
    703  * sys_mlock: memory lock
    704  */
    705 
    706 int
    707 sys_mlock(p, v, retval)
    708 	struct proc *p;
    709 	void *v;
    710 	register_t *retval;
    711 {
    712 	struct sys_mlock_args /* {
    713 		syscallarg(const void *) addr;
    714 		syscallarg(size_t) len;
    715 	} */ *uap = v;
    716 	vaddr_t addr;
    717 	vsize_t size, pageoff;
    718 	int error;
    719 
    720 	/*
    721 	 * extract syscall args from uap
    722 	 */
    723 	addr = (vaddr_t)SCARG(uap, addr);
    724 	size = (vsize_t)SCARG(uap, len);
    725 
    726 	/*
    727 	 * align the address to a page boundary and adjust the size accordingly
    728 	 */
    729 	pageoff = (addr & PAGE_MASK);
    730 	addr -= pageoff;
    731 	size += pageoff;
    732 	size = (vsize_t) round_page(size);
    733 
    734 	/* disallow wrap-around. */
    735 	if (addr + (int)size < addr)
    736 		return (EINVAL);
    737 
    738 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
    739 		return (EAGAIN);
    740 
    741 #ifdef pmap_wired_count
    742 	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
    743 			p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
    744 		return (EAGAIN);
    745 #else
    746 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    747 		return (error);
    748 #endif
    749 
    750 	error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
    751 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
    752 }
    753 
    754 /*
    755  * sys_munlock: unlock wired pages
    756  */
    757 
    758 int
    759 sys_munlock(p, v, retval)
    760 	struct proc *p;
    761 	void *v;
    762 	register_t *retval;
    763 {
    764 	struct sys_munlock_args /* {
    765 		syscallarg(const void *) addr;
    766 		syscallarg(size_t) len;
    767 	} */ *uap = v;
    768 	vaddr_t addr;
    769 	vsize_t size, pageoff;
    770 	int error;
    771 
    772 	/*
    773 	 * extract syscall args from uap
    774 	 */
    775 
    776 	addr = (vaddr_t)SCARG(uap, addr);
    777 	size = (vsize_t)SCARG(uap, len);
    778 
    779 	/*
    780 	 * align the address to a page boundary, and adjust the size accordingly
    781 	 */
    782 	pageoff = (addr & PAGE_MASK);
    783 	addr -= pageoff;
    784 	size += pageoff;
    785 	size = (vsize_t) round_page(size);
    786 
    787 	/* disallow wrap-around. */
    788 	if (addr + (int)size < addr)
    789 		return (EINVAL);
    790 
    791 #ifndef pmap_wired_count
    792 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    793 		return (error);
    794 #endif
    795 
    796 	error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
    797 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
    798 }
    799 
    800 /*
    801  * uvm_mmap: internal version of mmap
    802  *
    803  * - used by sys_mmap, exec, and sysv shm
    804  * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true,
    805  *	sysv shm uses "named anonymous memory")
    806  * - caller must page-align the file offset
    807  */
    808 
    809 int
    810 uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
    811 	vm_map_t map;
    812 	vaddr_t *addr;
    813 	vsize_t size;
    814 	vm_prot_t prot, maxprot;
    815 	int flags;
    816 	caddr_t handle;		/* XXX: VNODE? */
    817 	vaddr_t foff;
    818 {
    819 	struct uvm_object *uobj;
    820 	struct vnode *vp;
    821 	int retval;
    822 	int advice = UVM_ADV_NORMAL;
    823 	uvm_flag_t uvmflag = 0;
    824 
    825 	/*
    826 	 * check params
    827 	 */
    828 
    829 	if (size == 0)
    830 		return(0);
    831 	if (foff & PAGE_MASK)
    832 		return(EINVAL);
    833 	if ((prot & maxprot) != prot)
    834 		return(EINVAL);
    835 
    836 	/*
    837 	 * for non-fixed mappings, round off the suggested address.
    838 	 * for fixed mappings, check alignment and zap old mappings.
    839 	 */
    840 
    841 	if ((flags & MAP_FIXED) == 0) {
    842 		*addr = round_page(*addr);	/* round */
    843 	} else {
    844 
    845 		if (*addr & PAGE_MASK)
    846 			return(EINVAL);
    847 		uvmflag |= UVM_FLAG_FIXED;
    848 		(void) uvm_unmap(map, *addr, *addr + size);	/* zap! */
    849 	}
    850 
    851 	/*
    852 	 * handle anon vs. non-anon mappings.   for non-anon mappings attach
    853 	 * to underlying vm object.
    854 	 */
    855 
    856 	if (flags & MAP_ANON) {
    857 
    858 		foff = UVM_UNKNOWN_OFFSET;
    859 		uobj = NULL;
    860 		if ((flags & MAP_SHARED) == 0)
    861 			/* XXX: defer amap create */
    862 			uvmflag |= UVM_FLAG_COPYONW;
    863 		else
    864 			/* shared: create amap now */
    865 			uvmflag |= UVM_FLAG_OVERLAY;
    866 
    867 	} else {
    868 
    869 		vp = (struct vnode *) handle;	/* get vnode */
    870 		if (vp->v_type != VCHR) {
    871 			uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
    872 			   maxprot : (maxprot & ~VM_PROT_WRITE));
    873 
    874 			/*
    875 			 * XXXCDC: hack from old code
    876 			 * don't allow vnodes which have been mapped
    877 			 * shared-writeable to persist [forces them to be
    878 			 * flushed out when last reference goes].
    879 			 * XXXCDC: interesting side effect: avoids a bug.
    880 			 * note that in WRITE [ufs_readwrite.c] that we
    881 			 * allocate buffer, uncache, and then do the write.
    882 			 * the problem with this is that if the uncache causes
    883 			 * VM data to be flushed to the same area of the file
    884 			 * we are writing to... in that case we've got the
    885 			 * buffer locked and our process goes to sleep forever.
    886 			 *
    887 			 * XXXCDC: checking maxprot protects us from the
    888 			 * "persistbug" program but this is not a long term
    889 			 * solution.
    890 			 *
    891 			 * XXXCDC: we don't bother calling uncache with the vp
    892 			 * VOP_LOCKed since we know that we are already
    893 			 * holding a valid reference to the uvn (from the
    894 			 * uvn_attach above), and thus it is impossible for
    895 			 * the uncache to kill the uvn and trigger I/O.
    896 			 */
    897 			if (flags & MAP_SHARED) {
    898 				if ((prot & VM_PROT_WRITE) ||
    899 				    (maxprot & VM_PROT_WRITE)) {
    900 					uvm_vnp_uncache(vp);
    901 				}
    902 			}
    903 
    904 		} else {
    905 			uobj = udv_attach((void *) &vp->v_rdev,
    906 			    (flags & MAP_SHARED) ?
    907 			    maxprot : (maxprot & ~VM_PROT_WRITE));
    908 			advice = UVM_ADV_RANDOM;
    909 		}
    910 
    911 		if (uobj == NULL)
    912 			return((vp->v_type == VREG) ? ENOMEM : EINVAL);
    913 
    914 		if ((flags & MAP_SHARED) == 0)
    915 			uvmflag |= UVM_FLAG_COPYONW;
    916 	}
    917 
    918 	/*
    919 	 * set up mapping flags
    920 	 */
    921 
    922 	uvmflag = UVM_MAPFLAG(prot, maxprot,
    923 			(flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
    924 			advice, uvmflag);
    925 
    926 	/*
    927 	 * do it!
    928 	 */
    929 
    930 	retval = uvm_map(map, addr, size, uobj, foff, uvmflag);
    931 
    932 	if (retval == KERN_SUCCESS)
    933 		return(0);
    934 
    935 	/*
    936 	 * errors: first detach from the uobj, if any.
    937 	 */
    938 
    939 	if (uobj)
    940 		uobj->pgops->pgo_detach(uobj);
    941 
    942 	switch (retval) {
    943 	case KERN_INVALID_ADDRESS:
    944 	case KERN_NO_SPACE:
    945 		return(ENOMEM);
    946 	case KERN_PROTECTION_FAILURE:
    947 		return(EACCES);
    948 	}
    949 	return(EINVAL);
    950 }
    951