Home | History | Annotate | Line # | Download | only in dev
mm.c revision 1.23
      1 /*	$NetBSD: mm.c,v 1.23 2018/12/05 18:16:51 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2002, 2008, 2010 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Christos Zoulas, Joerg Sonnenberger and Mindaugas Rasiukevicius.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Special /dev/{mem,kmem,zero,null} memory devices.
     34  */
     35 
     36 #include <sys/cdefs.h>
     37 __KERNEL_RCSID(0, "$NetBSD: mm.c,v 1.23 2018/12/05 18:16:51 christos Exp $");
     38 
     39 #include "opt_compat_netbsd.h"
     40 
     41 #include <sys/param.h>
     42 #include <sys/conf.h>
     43 #include <sys/ioctl.h>
     44 #include <sys/mman.h>
     45 #include <sys/uio.h>
     46 #include <sys/termios.h>
     47 
     48 #include <dev/mm.h>
     49 
     50 #include <uvm/uvm_extern.h>
     51 
     52 static void *		dev_zero_page	__read_mostly;
     53 static kmutex_t		dev_mem_lock	__cacheline_aligned;
     54 static vaddr_t		dev_mem_addr	__read_mostly;
     55 
     56 static dev_type_open(mm_open);
     57 static dev_type_read(mm_readwrite);
     58 static dev_type_ioctl(mm_ioctl);
     59 static dev_type_mmap(mm_mmap);
     60 static dev_type_ioctl(mm_ioctl);
     61 
     62 const struct cdevsw mem_cdevsw = {
     63 	.d_open = mm_open,
     64 	.d_close = nullclose,
     65 	.d_read = mm_readwrite,
     66 	.d_write = mm_readwrite,
     67 	.d_ioctl = mm_ioctl,
     68 	.d_stop = nostop,
     69 	.d_tty = notty,
     70 	.d_poll = nopoll,
     71 	.d_mmap = mm_mmap,
     72 	.d_kqfilter = nokqfilter,
     73 	.d_discard = nodiscard,
     74 	.d_flag = D_MPSAFE
     75 };
     76 
     77 #ifdef pmax	/* XXX */
     78 const struct cdevsw mem_ultrix_cdevsw = {
     79 	.d_open = nullopen,
     80 	.d_close = nullclose,
     81 	.d_read = mm_readwrite,
     82 	.d_write = mm_readwrite,
     83 	.d_ioctl = mm_ioctl,
     84 	.d_stop = nostop,
     85 	.d_tty = notty,
     86 	.d_poll = nopoll,
     87 	.d_mmap = mm_mmap,
     88 	.d_kqfilter = nokqfilter,
     89 	.d_discard = nodiscard,
     90 	.d_flag = D_MPSAFE
     91 };
     92 #endif
     93 
     94 static int
     95 mm_open(dev_t dev, int flag, int mode, struct lwp *l)
     96 {
     97 #ifdef __HAVE_MM_MD_OPEN
     98 	int error;
     99 	if ((error = mm_md_open(dev, flag, mode, l)) != 0)
    100 		return error;
    101 #endif
    102 	l->l_proc->p_flag |= PK_KMEM;
    103 	return 0;
    104 }
    105 
    106 /*
    107  * mm_init: initialize memory device driver.
    108  */
    109 void
    110 mm_init(void)
    111 {
    112 	vaddr_t pg;
    113 
    114 	mutex_init(&dev_mem_lock, MUTEX_DEFAULT, IPL_NONE);
    115 
    116 	/* Read-only zero-page. */
    117 	pg = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
    118 	KASSERT(pg != 0);
    119 	pmap_protect(pmap_kernel(), pg, pg + PAGE_SIZE, VM_PROT_READ);
    120 	pmap_update(pmap_kernel());
    121 	dev_zero_page = (void *)pg;
    122 
    123 #ifndef __HAVE_MM_MD_CACHE_ALIASING
    124 	/* KVA for mappings during I/O. */
    125 	dev_mem_addr = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
    126 	    UVM_KMF_VAONLY|UVM_KMF_WAITVA);
    127 	KASSERT(dev_mem_addr != 0);
    128 #else
    129 	dev_mem_addr = 0;
    130 #endif
    131 }
    132 
    133 
    134 /*
    135  * dev_mem_getva: get a special virtual address.  If architecture requires,
    136  * allocate VA according to PA, which avoids cache-aliasing issues.  Use a
    137  * constant, general mapping address otherwise.
    138  */
    139 static inline vaddr_t
    140 dev_mem_getva(paddr_t pa, int color)
    141 {
    142 #ifdef __HAVE_MM_MD_CACHE_ALIASING
    143 	return uvm_km_alloc(kernel_map, PAGE_SIZE,
    144 	    color & uvmexp.colormask,
    145 	    UVM_KMF_VAONLY | UVM_KMF_WAITVA | UVM_KMF_COLORMATCH);
    146 #else
    147 	return dev_mem_addr;
    148 #endif
    149 }
    150 
    151 static inline void
    152 dev_mem_relva(paddr_t pa, vaddr_t va)
    153 {
    154 #ifdef __HAVE_MM_MD_CACHE_ALIASING
    155 	uvm_km_free(kernel_map, va, PAGE_SIZE, UVM_KMF_VAONLY);
    156 #else
    157 	KASSERT(dev_mem_addr == va);
    158 #endif
    159 }
    160 
    161 /*
    162  * dev_kmem_readwrite: helper for DEV_MEM (/dev/mem) case of R/W.
    163  */
    164 static int
    165 dev_mem_readwrite(struct uio *uio, struct iovec *iov)
    166 {
    167 	paddr_t paddr;
    168 	vaddr_t vaddr;
    169 	vm_prot_t prot;
    170 	size_t len, offset;
    171 	bool have_direct;
    172 	int error;
    173 	int color = 0;
    174 
    175 	/* Check for wrap around. */
    176 	if ((uintptr_t)uio->uio_offset != uio->uio_offset) {
    177 		return EFAULT;
    178 	}
    179 	paddr = uio->uio_offset & ~PAGE_MASK;
    180 	prot = (uio->uio_rw == UIO_WRITE) ? VM_PROT_WRITE : VM_PROT_READ;
    181 	error = mm_md_physacc(paddr, prot);
    182 	if (error) {
    183 		return error;
    184 	}
    185 	offset = uio->uio_offset & PAGE_MASK;
    186 	len = MIN(uio->uio_resid, PAGE_SIZE - offset);
    187 
    188 #ifdef __HAVE_MM_MD_CACHE_ALIASING
    189 	have_direct = mm_md_page_color(paddr, &color);
    190 #else
    191 	have_direct = true;
    192 	color = 0;
    193 #endif
    194 
    195 #ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS
    196 	/* Is physical address directly mapped?  Return VA. */
    197 	if (have_direct)
    198 		have_direct = mm_md_direct_mapped_phys(paddr, &vaddr);
    199 #else
    200 	vaddr = 0;
    201 	have_direct = false;
    202 #endif
    203 	if (!have_direct) {
    204 		/* Get a special virtual address. */
    205 		const vaddr_t va = dev_mem_getva(paddr, color);
    206 
    207 		/* Map selected KVA to physical address. */
    208 		mutex_enter(&dev_mem_lock);
    209 		pmap_kenter_pa(va, paddr, prot, 0);
    210 		pmap_update(pmap_kernel());
    211 
    212 		/* Perform I/O. */
    213 		vaddr = va + offset;
    214 		error = uiomove((void *)vaddr, len, uio);
    215 
    216 		/* Unmap, flush before unlock. */
    217 		pmap_kremove(va, PAGE_SIZE);
    218 		pmap_update(pmap_kernel());
    219 		mutex_exit(&dev_mem_lock);
    220 
    221 		/* "Release" the virtual address. */
    222 		dev_mem_relva(paddr, va);
    223 	} else {
    224 		/* Direct map, just perform I/O. */
    225 		vaddr += offset;
    226 		error = uiomove((void *)vaddr, len, uio);
    227 	}
    228 	return error;
    229 }
    230 
    231 /*
    232  * dev_kmem_readwrite: helper for DEV_KMEM (/dev/kmem) case of R/W.
    233  */
    234 static int
    235 dev_kmem_readwrite(struct uio *uio, struct iovec *iov)
    236 {
    237 	void *addr;
    238 	size_t len, offset;
    239 	vm_prot_t prot;
    240 	int error;
    241 	bool md_kva;
    242 
    243 	/* Check for wrap around. */
    244 	addr = (void *)(intptr_t)uio->uio_offset;
    245 	if ((uintptr_t)addr != uio->uio_offset) {
    246 		return EFAULT;
    247 	}
    248 	/*
    249 	 * Handle non-page aligned offset.
    250 	 * Otherwise, we operate in page-by-page basis.
    251 	 */
    252 	offset = uio->uio_offset & PAGE_MASK;
    253 	len = MIN(uio->uio_resid, PAGE_SIZE - offset);
    254 	prot = (uio->uio_rw == UIO_WRITE) ? VM_PROT_WRITE : VM_PROT_READ;
    255 
    256 	md_kva = false;
    257 
    258 #ifdef __HAVE_MM_MD_DIRECT_MAPPED_IO
    259 	paddr_t paddr;
    260 	/* MD case: is this is a directly mapped address? */
    261 	if (mm_md_direct_mapped_io(addr, &paddr)) {
    262 		/* If so, validate physical address. */
    263 		error = mm_md_physacc(paddr, prot);
    264 		if (error) {
    265 			return error;
    266 		}
    267 		md_kva = true;
    268 	}
    269 #endif
    270 	if (!md_kva) {
    271 		bool checked = false;
    272 
    273 #ifdef __HAVE_MM_MD_KERNACC
    274 		/* MD check for the address. */
    275 		error = mm_md_kernacc(addr, prot, &checked);
    276 		if (error) {
    277 			return error;
    278 		}
    279 #endif
    280 		/* UVM check for the address (unless MD indicated to not). */
    281 		if (!checked && !uvm_kernacc(addr, len, prot)) {
    282 			return EFAULT;
    283 		}
    284 	}
    285 	error = uiomove(addr, len, uio);
    286 	return error;
    287 }
    288 
    289 /*
    290  * dev_zero_readwrite: helper for DEV_ZERO (/dev/null) case of R/W.
    291  */
    292 static inline int
    293 dev_zero_readwrite(struct uio *uio, struct iovec *iov)
    294 {
    295 	size_t len;
    296 
    297 	/* Nothing to do for the write case. */
    298 	if (uio->uio_rw == UIO_WRITE) {
    299 		uio->uio_resid = 0;
    300 		return 0;
    301 	}
    302 	/*
    303 	 * Read in page-by-page basis, caller will continue.
    304 	 * Cut appropriately for a single/last-iteration cases.
    305 	 */
    306 	len = MIN(iov->iov_len, PAGE_SIZE);
    307 	return uiomove(dev_zero_page, len, uio);
    308 }
    309 
    310 /*
    311  * mm_readwrite: general memory R/W function.
    312  */
    313 static int
    314 mm_readwrite(dev_t dev, struct uio *uio, int flags)
    315 {
    316 	struct iovec *iov;
    317 	int error;
    318 
    319 #ifdef __HAVE_MM_MD_READWRITE
    320 	/* If defined - there are extra MD cases. */
    321 	switch (minor(dev)) {
    322 	case DEV_MEM:
    323 	case DEV_KMEM:
    324 	case DEV_NULL:
    325 	case DEV_ZERO:
    326 #if defined(COMPAT_16) && defined(__arm)
    327 	case _DEV_ZERO_oARM:
    328 #endif
    329 		break;
    330 	default:
    331 		return mm_md_readwrite(dev, uio);
    332 	}
    333 #endif
    334 	error = 0;
    335 	while (uio->uio_resid > 0 && error == 0) {
    336 		iov = uio->uio_iov;
    337 		if (iov->iov_len == 0) {
    338 			/* Processed; next I/O vector. */
    339 			uio->uio_iov++;
    340 			uio->uio_iovcnt--;
    341 			KASSERT(uio->uio_iovcnt >= 0);
    342 			continue;
    343 		}
    344 		/* Helper functions will process in page-by-page basis. */
    345 		switch (minor(dev)) {
    346 		case DEV_MEM:
    347 			error = dev_mem_readwrite(uio, iov);
    348 			break;
    349 		case DEV_KMEM:
    350 			error = dev_kmem_readwrite(uio, iov);
    351 			break;
    352 		case DEV_NULL:
    353 			if (uio->uio_rw == UIO_WRITE) {
    354 				uio->uio_resid = 0;
    355 			}
    356 			/* Break directly out of the loop. */
    357 			return 0;
    358 		case DEV_FULL:
    359 			if (uio->uio_rw == UIO_WRITE) {
    360 				return ENOSPC;
    361 			}
    362 			/*FALLTHROUGH*/
    363 #if defined(COMPAT_16) && defined(__arm)
    364 		case _DEV_ZERO_oARM:
    365 #endif
    366 		case DEV_ZERO:
    367 			error = dev_zero_readwrite(uio, iov);
    368 			break;
    369 		default:
    370 			error = ENXIO;
    371 			break;
    372 		}
    373 	}
    374 	return error;
    375 }
    376 
    377 /*
    378  * mm_mmap: general mmap() handler.
    379  */
    380 static paddr_t
    381 mm_mmap(dev_t dev, off_t off, int acc)
    382 {
    383 	vm_prot_t prot;
    384 
    385 #ifdef __HAVE_MM_MD_MMAP
    386 	/* If defined - there are extra mmap() MD cases. */
    387 	switch (minor(dev)) {
    388 	case DEV_MEM:
    389 	case DEV_KMEM:
    390 	case DEV_NULL:
    391 #if defined(COMPAT_16) && defined(__arm)
    392 	case _DEV_ZERO_oARM:
    393 #endif
    394 	case DEV_ZERO:
    395 		break;
    396 	default:
    397 		return mm_md_mmap(dev, off, acc);
    398 	}
    399 #endif
    400 	/*
    401 	 * /dev/null does not make sense, /dev/kmem is volatile and
    402 	 * /dev/zero is handled in mmap already.
    403 	 */
    404 	if (minor(dev) != DEV_MEM) {
    405 		return -1;
    406 	}
    407 
    408 	prot = 0;
    409 	if (acc & PROT_EXEC)
    410 		prot |= VM_PROT_EXECUTE;
    411 	if (acc & PROT_READ)
    412 		prot |= VM_PROT_READ;
    413 	if (acc & PROT_WRITE)
    414 		prot |= VM_PROT_WRITE;
    415 
    416 	/* Validate the physical address. */
    417 	if (mm_md_physacc(off, prot) != 0) {
    418 		return -1;
    419 	}
    420 	return off >> PGSHIFT;
    421 }
    422 
    423 static int
    424 mm_ioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
    425 {
    426 
    427 	switch (cmd) {
    428 	case FIONBIO:
    429 		/* We never block anyway. */
    430 		return 0;
    431 
    432 	case FIOSETOWN:
    433 	case FIOGETOWN:
    434 	case TIOCGPGRP:
    435 	case TIOCSPGRP:
    436 	case TIOCGETA:
    437 		return ENOTTY;
    438 
    439 	case FIOASYNC:
    440 		if ((*(int *)data) == 0) {
    441 			return 0;
    442 		}
    443 		/* FALLTHROUGH */
    444 	default:
    445 		return EOPNOTSUPP;
    446 	}
    447 }
    448