mm.c revision 1.25 1 /* $NetBSD: mm.c,v 1.25 2024/08/25 11:29:38 uwe Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2008, 2010 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Christos Zoulas, Joerg Sonnenberger and Mindaugas Rasiukevicius.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Special /dev/{mem,kmem,zero,null} memory devices.
34 */
35
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: mm.c,v 1.25 2024/08/25 11:29:38 uwe Exp $");
38
39 #include "opt_compat_netbsd.h"
40
41 #include <sys/param.h>
42 #include <sys/conf.h>
43 #include <sys/ioctl.h>
44 #include <sys/mman.h>
45 #include <sys/uio.h>
46 #include <sys/termios.h>
47
48 #include <dev/mm.h>
49
50 #include <uvm/uvm_extern.h>
51
52 static void * dev_zero_page __read_mostly;
53 static kmutex_t dev_mem_lock __cacheline_aligned;
54 static vaddr_t dev_mem_addr __read_mostly;
55
56 static dev_type_open(mm_open);
57 static dev_type_read(mm_readwrite);
58 static dev_type_mmap(mm_mmap);
59 static dev_type_ioctl(mm_ioctl);
60
61 const struct cdevsw mem_cdevsw = {
62 .d_open = mm_open,
63 .d_close = nullclose,
64 .d_read = mm_readwrite,
65 .d_write = mm_readwrite,
66 .d_ioctl = mm_ioctl,
67 .d_stop = nostop,
68 .d_tty = notty,
69 .d_poll = nopoll,
70 .d_mmap = mm_mmap,
71 .d_kqfilter = nokqfilter,
72 .d_discard = nodiscard,
73 .d_flag = D_MPSAFE
74 };
75
76 #ifdef pmax /* XXX */
77 const struct cdevsw mem_ultrix_cdevsw = {
78 .d_open = nullopen,
79 .d_close = nullclose,
80 .d_read = mm_readwrite,
81 .d_write = mm_readwrite,
82 .d_ioctl = mm_ioctl,
83 .d_stop = nostop,
84 .d_tty = notty,
85 .d_poll = nopoll,
86 .d_mmap = mm_mmap,
87 .d_kqfilter = nokqfilter,
88 .d_discard = nodiscard,
89 .d_flag = D_MPSAFE
90 };
91 #endif
92
93 static int
94 mm_open(dev_t dev, int flag, int mode, struct lwp *l)
95 {
96 #ifdef __HAVE_MM_MD_OPEN
97 int error;
98 if ((error = mm_md_open(dev, flag, mode, l)) != 0)
99 return error;
100 #endif
101 l->l_proc->p_flag |= PK_KMEM;
102 return 0;
103 }
104
105 /*
106 * mm_init: initialize memory device driver.
107 */
108 void
109 mm_init(void)
110 {
111 vaddr_t pg;
112
113 mutex_init(&dev_mem_lock, MUTEX_DEFAULT, IPL_NONE);
114
115 /* Read-only zero-page. */
116 pg = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
117 KASSERT(pg != 0);
118 pmap_protect(pmap_kernel(), pg, pg + PAGE_SIZE, VM_PROT_READ);
119 pmap_update(pmap_kernel());
120 dev_zero_page = (void *)pg;
121
122 #ifndef __HAVE_MM_MD_CACHE_ALIASING
123 /* KVA for mappings during I/O. */
124 dev_mem_addr = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
125 UVM_KMF_VAONLY|UVM_KMF_WAITVA);
126 KASSERT(dev_mem_addr != 0);
127 #else
128 dev_mem_addr = 0;
129 #endif
130 }
131
132
133 /*
134 * dev_mem_getva: get a special virtual address. If architecture requires,
135 * allocate VA according to PA, which avoids cache-aliasing issues. Use a
136 * constant, general mapping address otherwise.
137 */
138 static inline vaddr_t
139 dev_mem_getva(paddr_t pa, int color)
140 {
141 #ifdef __HAVE_MM_MD_CACHE_ALIASING
142 return uvm_km_alloc(kernel_map, PAGE_SIZE,
143 color & uvmexp.colormask,
144 UVM_KMF_VAONLY | UVM_KMF_WAITVA | UVM_KMF_COLORMATCH);
145 #else
146 return dev_mem_addr;
147 #endif
148 }
149
150 static inline void
151 dev_mem_relva(paddr_t pa, vaddr_t va)
152 {
153 #ifdef __HAVE_MM_MD_CACHE_ALIASING
154 uvm_km_free(kernel_map, va, PAGE_SIZE, UVM_KMF_VAONLY);
155 #else
156 KASSERT(dev_mem_addr == va);
157 #endif
158 }
159
160 /*
161 * dev_kmem_readwrite: helper for DEV_MEM (/dev/mem) case of R/W.
162 */
163 static int
164 dev_mem_readwrite(struct uio *uio, struct iovec *iov)
165 {
166 paddr_t paddr;
167 vaddr_t vaddr;
168 vm_prot_t prot;
169 size_t len, offset;
170 bool have_direct;
171 int error;
172 int color = 0;
173
174 /* Check for wrap around. */
175 if ((uintptr_t)uio->uio_offset != uio->uio_offset) {
176 return EFAULT;
177 }
178 paddr = uio->uio_offset & ~PAGE_MASK;
179 prot = (uio->uio_rw == UIO_WRITE) ? VM_PROT_WRITE : VM_PROT_READ;
180 error = mm_md_physacc(paddr, prot);
181 if (error) {
182 return error;
183 }
184 offset = uio->uio_offset & PAGE_MASK;
185 len = MIN(uio->uio_resid, PAGE_SIZE - offset);
186
187 #ifdef __HAVE_MM_MD_CACHE_ALIASING
188 have_direct = mm_md_page_color(paddr, &color);
189 #else
190 have_direct = true;
191 color = 0;
192 #endif
193
194 #ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS
195 /* Is physical address directly mapped? Return VA. */
196 if (have_direct)
197 have_direct = mm_md_direct_mapped_phys(paddr, &vaddr);
198 #else
199 vaddr = 0;
200 have_direct = false;
201 #endif
202 if (!have_direct) {
203 /* Get a special virtual address. */
204 const vaddr_t va = dev_mem_getva(paddr, color);
205
206 /* Map selected KVA to physical address. */
207 mutex_enter(&dev_mem_lock);
208 pmap_kenter_pa(va, paddr, prot, 0);
209 pmap_update(pmap_kernel());
210
211 /* Perform I/O. */
212 vaddr = va + offset;
213 error = uiomove((void *)vaddr, len, uio);
214
215 /* Unmap, flush before unlock. */
216 pmap_kremove(va, PAGE_SIZE);
217 pmap_update(pmap_kernel());
218 mutex_exit(&dev_mem_lock);
219
220 /* "Release" the virtual address. */
221 dev_mem_relva(paddr, va);
222 } else {
223 /* Direct map, just perform I/O. */
224 vaddr += offset;
225 error = uiomove((void *)vaddr, len, uio);
226 }
227 return error;
228 }
229
230 /*
231 * dev_kmem_readwrite: helper for DEV_KMEM (/dev/kmem) case of R/W.
232 */
233 static int
234 dev_kmem_readwrite(struct uio *uio, struct iovec *iov)
235 {
236 void *addr;
237 size_t len, offset;
238 vm_prot_t prot;
239 int error;
240 bool md_kva;
241
242 /* Check for wrap around. */
243 addr = (void *)(intptr_t)uio->uio_offset;
244 if ((uintptr_t)addr != uio->uio_offset) {
245 return EFAULT;
246 }
247 /*
248 * Handle non-page aligned offset.
249 * Otherwise, we operate in page-by-page basis.
250 */
251 offset = uio->uio_offset & PAGE_MASK;
252 len = MIN(uio->uio_resid, PAGE_SIZE - offset);
253 prot = (uio->uio_rw == UIO_WRITE) ? VM_PROT_WRITE : VM_PROT_READ;
254
255 md_kva = false;
256
257 #ifdef __HAVE_MM_MD_DIRECT_MAPPED_IO
258 paddr_t paddr;
259 /* MD case: is this is a directly mapped address? */
260 if (mm_md_direct_mapped_io(addr, &paddr)) {
261 /* If so, validate physical address. */
262 error = mm_md_physacc(paddr, prot);
263 if (error) {
264 return error;
265 }
266 md_kva = true;
267 }
268 #endif
269 if (!md_kva) {
270 bool checked = false;
271
272 #ifdef __HAVE_MM_MD_KERNACC
273 /* MD check for the address. */
274 error = mm_md_kernacc(addr, prot, &checked);
275 if (error) {
276 return error;
277 }
278 #endif
279 /* UVM check for the address (unless MD indicated to not). */
280 if (!checked && !uvm_kernacc(addr, len, prot)) {
281 return EFAULT;
282 }
283 }
284 error = uiomove(addr, len, uio);
285 return error;
286 }
287
288 /*
289 * dev_zero_readwrite: helper for DEV_ZERO (/dev/null) case of R/W.
290 */
291 static inline int
292 dev_zero_readwrite(struct uio *uio, struct iovec *iov)
293 {
294 size_t len;
295
296 /* Nothing to do for the write case. */
297 if (uio->uio_rw == UIO_WRITE) {
298 uio->uio_resid = 0;
299 return 0;
300 }
301 /*
302 * Read in page-by-page basis, caller will continue.
303 * Cut appropriately for a single/last-iteration cases.
304 */
305 len = MIN(iov->iov_len, PAGE_SIZE);
306 return uiomove(dev_zero_page, len, uio);
307 }
308
309 /*
310 * mm_readwrite: general memory R/W function.
311 */
312 static int
313 mm_readwrite(dev_t dev, struct uio *uio, int flags)
314 {
315 struct iovec *iov;
316 int error;
317
318 #ifdef __HAVE_MM_MD_READWRITE
319 /* If defined - there are extra MD cases. */
320 switch (minor(dev)) {
321 case DEV_MEM:
322 case DEV_KMEM:
323 case DEV_NULL:
324 case DEV_ZERO:
325 #if defined(COMPAT_16) && defined(__arm)
326 case _DEV_ZERO_oARM:
327 #endif
328 break;
329 default:
330 return mm_md_readwrite(dev, uio);
331 }
332 #endif
333 error = 0;
334 while (uio->uio_resid > 0 && error == 0) {
335 iov = uio->uio_iov;
336 if (iov->iov_len == 0) {
337 /* Processed; next I/O vector. */
338 uio->uio_iov++;
339 uio->uio_iovcnt--;
340 KASSERT(uio->uio_iovcnt >= 0);
341 continue;
342 }
343 /* Helper functions will process in page-by-page basis. */
344 switch (minor(dev)) {
345 case DEV_MEM:
346 error = dev_mem_readwrite(uio, iov);
347 break;
348 case DEV_KMEM:
349 error = dev_kmem_readwrite(uio, iov);
350 break;
351 case DEV_NULL:
352 if (uio->uio_rw == UIO_WRITE) {
353 uio->uio_resid = 0;
354 }
355 /* Break directly out of the loop. */
356 return 0;
357 case DEV_FULL:
358 if (uio->uio_rw == UIO_WRITE) {
359 return ENOSPC;
360 }
361 #if defined(COMPAT_16) && defined(__arm)
362 /* FALLTHROUGH */
363 case _DEV_ZERO_oARM:
364 #endif
365 /* FALLTHROUGH */
366 case DEV_ZERO:
367 error = dev_zero_readwrite(uio, iov);
368 break;
369 default:
370 error = ENXIO;
371 break;
372 }
373 }
374 return error;
375 }
376
377 /*
378 * mm_mmap: general mmap() handler.
379 */
380 static paddr_t
381 mm_mmap(dev_t dev, off_t off, int acc)
382 {
383 vm_prot_t prot;
384
385 #ifdef __HAVE_MM_MD_MMAP
386 /* If defined - there are extra mmap() MD cases. */
387 switch (minor(dev)) {
388 case DEV_MEM:
389 case DEV_KMEM:
390 case DEV_NULL:
391 #if defined(COMPAT_16) && defined(__arm)
392 case _DEV_ZERO_oARM:
393 #endif
394 case DEV_ZERO:
395 break;
396 default:
397 return mm_md_mmap(dev, off, acc);
398 }
399 #endif
400 /*
401 * /dev/null does not make sense, /dev/kmem is volatile and
402 * /dev/zero is handled in mmap already.
403 */
404 if (minor(dev) != DEV_MEM) {
405 return -1;
406 }
407
408 prot = 0;
409 if (acc & PROT_EXEC)
410 prot |= VM_PROT_EXECUTE;
411 if (acc & PROT_READ)
412 prot |= VM_PROT_READ;
413 if (acc & PROT_WRITE)
414 prot |= VM_PROT_WRITE;
415
416 /* Validate the physical address. */
417 if (mm_md_physacc(off, prot) != 0) {
418 return -1;
419 }
420 return off >> PGSHIFT;
421 }
422
423 static int
424 mm_ioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
425 {
426
427 switch (cmd) {
428 case FIONBIO:
429 /* We never block anyway. */
430 return 0;
431
432 case FIOSETOWN:
433 case FIOGETOWN:
434 case TIOCGPGRP:
435 case TIOCSPGRP:
436 case TIOCGETA:
437 return ENOTTY;
438
439 case FIOASYNC:
440 if ((*(int *)data) == 0) {
441 return 0;
442 }
443 /* FALLTHROUGH */
444 default:
445 return EOPNOTSUPP;
446 }
447 }
448