Home | History | Annotate | Line # | Download | only in kern
      1 /*	$NetBSD: subr_physmap.c,v 1.5 2021/09/06 20:55:08 andvar Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2013 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Matt Thomas of 3am Software Foundry.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(1, "$NetBSD: subr_physmap.c,v 1.5 2021/09/06 20:55:08 andvar Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/physmap.h>
     37 #include <sys/kmem.h>
     38 
     39 #include <uvm/uvm_extern.h>
     40 #include <uvm/uvm_page.h>
     41 
     42 #include <dev/mm.h>
     43 
     44 /*
     45  * This file contain support routines used to create and destroy lists of
     46  * physical pages from lists of pages or ranges of virtual address.  By using
     47  * these physical maps, the kernel can avoid mapping physical I/O in the
     48  * kernel's address space in most cases.
     49  */
     50 
     51 typedef struct {
     52 	physmap_t *pc_physmap;
     53 	physmap_segment_t *pc_segs;
     54 	vsize_t pc_offset;
     55 	vsize_t pc_klen;
     56 	vaddr_t pc_kva;
     57 	u_int pc_nsegs;
     58 	vm_prot_t pc_prot;
     59 	bool pc_direct_mapped;
     60 } physmap_cookie_t;
     61 
     62 /*
     63  * Allocate a physmap structure that requires "maxsegs" segments.
     64  */
     65 static physmap_t *
     66 physmap_alloc(size_t maxsegs)
     67 {
     68 	const size_t mapsize = offsetof(physmap_t, pm_segs[maxsegs]);
     69 
     70 	KASSERT(maxsegs > 0);
     71 
     72 	physmap_t * const map = kmem_zalloc(mapsize, KM_SLEEP);
     73 	map->pm_maxsegs = maxsegs;
     74 
     75 	return map;
     76 }
     77 
     78 static int
     79 physmap_fill(physmap_t *map, pmap_t pmap, vaddr_t va, vsize_t len)
     80 {
     81 	size_t nsegs = map->pm_nsegs;
     82 	physmap_segment_t *ps = &map->pm_segs[nsegs];
     83 	vsize_t offset = va - trunc_page(va);
     84 
     85 	if (nsegs == 0) {
     86 		if (!pmap_extract(pmap, va, &ps->ps_addr)) {
     87 			return EFAULT;
     88 		}
     89 		ps->ps_len = MIN(len, PAGE_SIZE - offset);
     90 		if (ps->ps_len == len) {
     91 			map->pm_nsegs = 1;
     92 			return 0;
     93 		}
     94 		offset = 0;
     95 	} else {
     96 		/*
     97 		 * Backup to the last segment since we have to see if we can
     98 		 * merge virtual addresses that are physically contiguous into
     99 		 * as few segments as possible.
    100 		 */
    101 		ps--;
    102 		nsegs--;
    103 	}
    104 
    105 	paddr_t lastaddr = ps->ps_addr + ps->ps_len;
    106 	for (;;) {
    107 		paddr_t curaddr;
    108 		if (!pmap_extract(pmap, va, &curaddr)) {
    109 			return EFAULT;
    110 		}
    111 		if (curaddr != lastaddr) {
    112 			ps++;
    113 			nsegs++;
    114 			KASSERT(nsegs < map->pm_maxsegs);
    115 			ps->ps_addr = curaddr;
    116 			lastaddr = curaddr;
    117 		}
    118 		if (offset + len > PAGE_SIZE) {
    119 			ps->ps_len += PAGE_SIZE - offset;
    120 			lastaddr = ps->ps_addr + ps->ps_len;
    121 			len -= PAGE_SIZE - offset;
    122 			lastaddr += PAGE_SIZE - offset;
    123 			offset = 0;
    124 		} else {
    125 			ps->ps_len += len;
    126 			map->pm_nsegs = nsegs + 1;
    127 			return 0;
    128 		}
    129 	}
    130 }
    131 
    132 /*
    133  * Create a physmap and populate it with the pages that are used to mapped
    134  * linear range of virtual addresses.  It is assumed that uvm_vslock has been
    135  * called to lock these pages into memory.
    136  */
    137 int
    138 physmap_create_linear(physmap_t **map_p, const struct vmspace *vs, vaddr_t va,
    139 	vsize_t len)
    140 {
    141 	const size_t maxsegs = atop(round_page(va + len) - trunc_page(va));
    142 	physmap_t * const map = physmap_alloc(maxsegs);
    143 	int error = physmap_fill(map, vs->vm_map.pmap, va, len);
    144 	if (error) {
    145 		physmap_destroy(map);
    146 		*map_p = NULL;
    147 		return error;
    148 	}
    149 	*map_p = map;
    150 	return 0;
    151 }
    152 
    153 /*
    154  * Create a physmap and populate it with the pages that are contained in an
    155  * iovec array.  It is assumed that uvm_vslock has been called to lock these
    156  * pages into memory.
    157  */
    158 int
    159 physmap_create_iov(physmap_t **map_p, const struct vmspace *vs,
    160 	struct iovec *iov, size_t iovlen)
    161 {
    162 	size_t maxsegs = 0;
    163 	for (size_t i = 0; i < iovlen; i++) {
    164 		const vaddr_t start = (vaddr_t) iov[i].iov_base;
    165 		const vaddr_t end = start + iov[i].iov_len;
    166 		maxsegs += atop(round_page(end) - trunc_page(start));
    167 	}
    168 	physmap_t * const map = physmap_alloc(maxsegs);
    169 
    170 	for (size_t i = 0; i < iovlen; i++) {
    171 		int error = physmap_fill(map, vs->vm_map.pmap,
    172 		    (vaddr_t) iov[i].iov_base, iov[i].iov_len);
    173 		if (error) {
    174 			physmap_destroy(map);
    175 			*map_p = NULL;
    176 			return error;
    177 		}
    178 	}
    179 	*map_p = map;
    180 	return 0;
    181 }
    182 
    183 /*
    184  * This uses a list of vm_page structure to create a physmap.
    185  */
    186 physmap_t *
    187 physmap_create_pagelist(struct vm_page **pgs, size_t npgs)
    188 {
    189 	physmap_t * const map = physmap_alloc(npgs);
    190 
    191 	physmap_segment_t *ps = map->pm_segs;
    192 
    193 	/*
    194 	 * Initialize the first segment.
    195 	 */
    196 	paddr_t lastaddr = VM_PAGE_TO_PHYS(pgs[0]);
    197 	ps->ps_addr = lastaddr;
    198 	ps->ps_len = PAGE_SIZE;
    199 
    200 	for (pgs++; npgs-- > 1; pgs++) {
    201 		/*
    202 		 * lastaddr needs to be increased by a page.
    203 		 */
    204 		lastaddr += PAGE_SIZE;
    205 		paddr_t curaddr = VM_PAGE_TO_PHYS(*pgs);
    206 		if (curaddr != lastaddr) {
    207 			/*
    208 			 * If the addresses are not the same, we need to use
    209 			 * a new segment.  Set its address and update lastaddr.
    210 			 */
    211 			ps++;
    212 			ps->ps_addr = curaddr;
    213 			lastaddr = curaddr;
    214 		}
    215 		/*
    216 		 * Increase this segment's length by a page
    217 		 */
    218 		ps->ps_len += PAGE_SIZE;
    219 	}
    220 
    221 	map->pm_nsegs = ps + 1 - map->pm_segs;
    222 	return map;
    223 }
    224 
    225 void
    226 physmap_destroy(physmap_t *map)
    227 {
    228 	const size_t mapsize = offsetof(physmap_t, pm_segs[map->pm_maxsegs]);
    229 
    230 	kmem_free(map, mapsize);
    231 }
    232 
    233 void *
    234 physmap_map_init(physmap_t *map, size_t offset, vm_prot_t prot)
    235 {
    236 	physmap_cookie_t * const pc = kmem_zalloc(sizeof(*pc), KM_SLEEP);
    237 
    238 	KASSERT(prot == VM_PROT_READ || prot == (VM_PROT_READ|VM_PROT_WRITE));
    239 
    240 	pc->pc_physmap = map;
    241 	pc->pc_segs = map->pm_segs;
    242 	pc->pc_nsegs = map->pm_nsegs;
    243 	pc->pc_prot = prot;
    244 	pc->pc_klen = 0;
    245 	pc->pc_kva = 0;
    246 	pc->pc_direct_mapped = false;
    247 
    248 	/*
    249 	 * Skip to the first segment we are interested in.
    250 	 */
    251 	while (offset >= pc->pc_segs->ps_len) {
    252 		offset -= pc->pc_segs->ps_len;
    253 		pc->pc_segs++;
    254 		pc->pc_nsegs--;
    255 	}
    256 
    257 	pc->pc_offset = offset;
    258 
    259 	return pc;
    260 }
    261 
    262 size_t
    263 physmap_map(void *cookie, vaddr_t *kvap)
    264 {
    265 	physmap_cookie_t * const pc = cookie;
    266 
    267 	/*
    268 	 * If there is currently a non-direct mapped KVA region allocated,
    269 	 * free it now.
    270 	 */
    271 	if (pc->pc_kva != 0 && !pc->pc_direct_mapped) {
    272 		pmap_kremove(pc->pc_kva, pc->pc_klen);
    273 		pmap_update(pmap_kernel());
    274 		uvm_km_free(kernel_map, pc->pc_kva, pc->pc_klen,
    275 		    UVM_KMF_VAONLY);
    276 	}
    277 
    278 	/*
    279 	 * If there are no more segments to process, return 0 indicating
    280 	 * we are done.
    281 	 */
    282 	if (pc->pc_nsegs == 0) {
    283 		return 0;
    284 	}
    285 
    286 	/*
    287 	 * Get starting physical address of this segment and its length.
    288 	 */
    289 	paddr_t pa = pc->pc_segs->ps_addr + pc->pc_offset;
    290 	const size_t koff = pa & PAGE_MASK;
    291 	const size_t len = pc->pc_segs->ps_len - pc->pc_offset;
    292 
    293 	/*
    294 	 * Now that we have the starting offset in the page, reset to the
    295 	 * beginning of the page.
    296 	 */
    297 	pa = trunc_page(pa);
    298 
    299 	/*
    300 	 * We are now done with this segment; advance to the next one.
    301 	 */
    302 	pc->pc_segs++;
    303 	pc->pc_nsegs--;
    304 	pc->pc_offset = 0;
    305 
    306 	/*
    307 	 * Find out how many pages we are mapping.
    308 	 */
    309 	pc->pc_klen = round_page(len);
    310 #ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS
    311 	/*
    312 	 * Always try to direct map it since that's nearly zero cost.
    313 	 */
    314 	pc->pc_direct_mapped = mm_md_direct_mapped_phys(pa, &pc->pc_kva);
    315 #endif
    316 	if (!pc->pc_direct_mapped) {
    317 		/*
    318 		 * If we can't direct map it, we have to allocate some KVA
    319 		 * so we map it via the kernel_map.
    320 		 */
    321 		pc->pc_kva = uvm_km_alloc(kernel_map, pc->pc_klen,
    322 		    atop(pa) & uvmexp.colormask,
    323 		    UVM_KMF_VAONLY | UVM_KMF_WAITVA | UVM_KMF_COLORMATCH);
    324 		KASSERT(pc->pc_kva != 0);
    325 
    326 		/*
    327 		 * Setup mappings for this segment.
    328 		 */
    329 		for (size_t poff = 0; poff < pc->pc_klen; poff += PAGE_SIZE) {
    330 			pmap_kenter_pa(pc->pc_kva + poff, pa + poff,
    331 			    pc->pc_prot, 0);
    332 		}
    333 		/*
    334 		 * Make them real.
    335 		 */
    336 		pmap_update(pmap_kernel());
    337 	}
    338 	/*
    339 	 * Return the starting KVA (including offset into the page) and
    340 	 * the length of this segment.
    341 	 */
    342 	*kvap = pc->pc_kva + koff;
    343 	return len;
    344 }
    345 
    346 void
    347 physmap_map_fini(void *cookie)
    348 {
    349 	physmap_cookie_t * const pc = cookie;
    350 
    351 	/*
    352 	 * If there is currently a non-direct mapped KVA region allocated,
    353 	 * free it now.
    354 	 */
    355 	if (pc->pc_kva != 0 && !pc->pc_direct_mapped) {
    356 		pmap_kremove(pc->pc_kva, pc->pc_klen);
    357 		pmap_update(pmap_kernel());
    358 		uvm_km_free(kernel_map, pc->pc_kva, pc->pc_klen,
    359 		    UVM_KMF_VAONLY);
    360 	}
    361 
    362 	/*
    363 	 * Free the cookie.
    364 	 */
    365 	kmem_free(pc, sizeof(*pc));
    366 }
    367 
    368 /*
    369  * genio needs to zero pages past the EOF or without backing storage (think
    370  * sparse files).  But since we are using physmaps, there is no kva to use with
    371  * memset so we need a helper to obtain a kva and memset the desired memory.
    372  */
    373 void
    374 physmap_zero(physmap_t *map, size_t offset, size_t len)
    375 {
    376 	void * const cookie = physmap_map_init(map, offset,
    377 	    VM_PROT_READ|VM_PROT_WRITE);
    378 
    379 	for (;;) {
    380 		vaddr_t kva;
    381 		size_t seglen = physmap_map(cookie, &kva);
    382 		KASSERT(seglen != 0);
    383 		if (seglen > len)
    384 			seglen = len;
    385 		memset((void *)kva, 0, seglen);
    386 		if (seglen == len)
    387 			break;
    388 	}
    389 
    390 	physmap_map_fini(cookie);
    391 }
    392