Home | History | Annotate | Line # | Download | only in prekern
mm.c revision 1.22
      1 /*	$NetBSD: mm.c,v 1.22 2018/06/20 11:49:37 maxv Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Maxime Villard.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 #include "prekern.h"
     32 
     33 #define ELFROUND	64
     34 
     35 static const uint8_t pads[4] = {
     36 	[BTSEG_NONE] = 0x00,
     37 	[BTSEG_TEXT] = 0xCC,
     38 	[BTSEG_RODATA] = 0x00,
     39 	[BTSEG_DATA] = 0x00
     40 };
     41 
     42 #define MM_PROT_READ	0x00
     43 #define MM_PROT_WRITE	0x01
     44 #define MM_PROT_EXECUTE	0x02
     45 
     46 static const pt_entry_t protection_codes[3] = {
     47 	[MM_PROT_READ] = PG_RO | PG_NX,
     48 	[MM_PROT_WRITE] = PG_RW | PG_NX,
     49 	[MM_PROT_EXECUTE] = PG_RO,
     50 	/* RWX does not exist */
     51 };
     52 
     53 struct bootspace bootspace;
     54 
     55 extern paddr_t kernpa_start, kernpa_end;
     56 vaddr_t iom_base;
     57 
     58 paddr_t pa_avail = 0;
     59 static const vaddr_t tmpva = (PREKERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2);
     60 
     61 void
     62 mm_init(paddr_t first_pa)
     63 {
     64 	pa_avail = first_pa;
     65 }
     66 
     67 static void
     68 mm_enter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot)
     69 {
     70 	if (PTE_BASE[pl1_i(va)] & PG_V) {
     71 		fatal("mm_enter_pa: mapping already present");
     72 	}
     73 	PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot];
     74 }
     75 
     76 static void
     77 mm_reenter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot)
     78 {
     79 	PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot];
     80 }
     81 
     82 static void
     83 mm_flush_va(vaddr_t va)
     84 {
     85 	asm volatile("invlpg (%0)" ::"r" (va) : "memory");
     86 }
     87 
     88 static paddr_t
     89 mm_palloc(size_t npages)
     90 {
     91 	paddr_t pa;
     92 	size_t i;
     93 
     94 	/* Allocate the physical pages */
     95 	pa = pa_avail;
     96 	pa_avail += npages * PAGE_SIZE;
     97 
     98 	/* Zero them out */
     99 	for (i = 0; i < npages; i++) {
    100 		mm_reenter_pa(pa + i * PAGE_SIZE, tmpva,
    101 		    MM_PROT_READ|MM_PROT_WRITE);
    102 		mm_flush_va(tmpva);
    103 		memset((void *)tmpva, 0, PAGE_SIZE);
    104 	}
    105 
    106 	return pa;
    107 }
    108 
    109 static bool
    110 mm_pte_is_valid(pt_entry_t pte)
    111 {
    112 	return ((pte & PG_V) != 0);
    113 }
    114 
    115 static void
    116 mm_mprotect(vaddr_t startva, size_t size, pte_prot_t prot)
    117 {
    118 	size_t i, npages;
    119 	vaddr_t va;
    120 	paddr_t pa;
    121 
    122 	ASSERT(size % PAGE_SIZE == 0);
    123 	npages = size / PAGE_SIZE;
    124 
    125 	for (i = 0; i < npages; i++) {
    126 		va = startva + i * PAGE_SIZE;
    127 		pa = (PTE_BASE[pl1_i(va)] & PG_FRAME);
    128 		mm_reenter_pa(pa, va, prot);
    129 		mm_flush_va(va);
    130 	}
    131 }
    132 
    133 void
    134 mm_bootspace_mprotect(void)
    135 {
    136 	pte_prot_t prot;
    137 	size_t i;
    138 
    139 	/* Remap the kernel segments with proper permissions. */
    140 	for (i = 0; i < BTSPACE_NSEGS; i++) {
    141 		if (bootspace.segs[i].type == BTSEG_TEXT) {
    142 			prot = MM_PROT_READ|MM_PROT_EXECUTE;
    143 		} else if (bootspace.segs[i].type == BTSEG_RODATA) {
    144 			prot = MM_PROT_READ;
    145 		} else {
    146 			continue;
    147 		}
    148 		mm_mprotect(bootspace.segs[i].va, bootspace.segs[i].sz, prot);
    149 	}
    150 
    151 	print_state(true, "Segments protection updated");
    152 }
    153 
    154 static size_t
    155 mm_nentries_range(vaddr_t startva, vaddr_t endva, size_t pgsz)
    156 {
    157 	size_t npages;
    158 
    159 	npages = roundup((endva / PAGE_SIZE), (pgsz / PAGE_SIZE)) -
    160 	    rounddown((startva / PAGE_SIZE), (pgsz / PAGE_SIZE));
    161 	return (npages / (pgsz / PAGE_SIZE));
    162 }
    163 
    164 static void
    165 mm_map_tree(vaddr_t startva, vaddr_t endva)
    166 {
    167 	size_t i, nL4e, nL3e, nL2e;
    168 	size_t L4e_idx, L3e_idx, L2e_idx;
    169 	paddr_t pa;
    170 
    171 	/* Build L4. */
    172 	L4e_idx = pl4_i(startva);
    173 	nL4e = mm_nentries_range(startva, endva, NBPD_L4);
    174 	ASSERT(L4e_idx == 511);
    175 	ASSERT(nL4e == 1);
    176 	if (!mm_pte_is_valid(L4_BASE[L4e_idx])) {
    177 		pa = mm_palloc(1);
    178 		L4_BASE[L4e_idx] = pa | PG_V | PG_RW;
    179 	}
    180 
    181 	/* Build L3. */
    182 	L3e_idx = pl3_i(startva);
    183 	nL3e = mm_nentries_range(startva, endva, NBPD_L3);
    184 	for (i = 0; i < nL3e; i++) {
    185 		if (mm_pte_is_valid(L3_BASE[L3e_idx+i])) {
    186 			continue;
    187 		}
    188 		pa = mm_palloc(1);
    189 		L3_BASE[L3e_idx+i] = pa | PG_V | PG_RW;
    190 	}
    191 
    192 	/* Build L2. */
    193 	L2e_idx = pl2_i(startva);
    194 	nL2e = mm_nentries_range(startva, endva, NBPD_L2);
    195 	for (i = 0; i < nL2e; i++) {
    196 		if (mm_pte_is_valid(L2_BASE[L2e_idx+i])) {
    197 			continue;
    198 		}
    199 		pa = mm_palloc(1);
    200 		L2_BASE[L2e_idx+i] = pa | PG_V | PG_RW;
    201 	}
    202 }
    203 
    204 static vaddr_t
    205 mm_randva_kregion(size_t size, size_t pagesz)
    206 {
    207 	vaddr_t sva, eva;
    208 	vaddr_t randva;
    209 	uint64_t rnd;
    210 	size_t i;
    211 	bool ok;
    212 
    213 	while (1) {
    214 		prng_get_rand(&rnd, sizeof(rnd));
    215 		randva = rounddown(KASLR_WINDOW_BASE +
    216 		    rnd % (KASLR_WINDOW_SIZE - size), pagesz);
    217 
    218 		/* Detect collisions */
    219 		ok = true;
    220 		for (i = 0; i < BTSPACE_NSEGS; i++) {
    221 			if (bootspace.segs[i].type == BTSEG_NONE) {
    222 				continue;
    223 			}
    224 			sva = bootspace.segs[i].va;
    225 			eva = sva + bootspace.segs[i].sz;
    226 
    227 			if ((sva <= randva) && (randva < eva)) {
    228 				ok = false;
    229 				break;
    230 			}
    231 			if ((sva < randva + size) && (randva + size <= eva)) {
    232 				ok = false;
    233 				break;
    234 			}
    235 			if (randva < sva && eva < (randva + size)) {
    236 				ok = false;
    237 				break;
    238 			}
    239 		}
    240 		if (ok) {
    241 			break;
    242 		}
    243 	}
    244 
    245 	mm_map_tree(randva, randva + size);
    246 
    247 	return randva;
    248 }
    249 
    250 static paddr_t
    251 bootspace_getend(void)
    252 {
    253 	paddr_t pa, max = 0;
    254 	size_t i;
    255 
    256 	for (i = 0; i < BTSPACE_NSEGS; i++) {
    257 		if (bootspace.segs[i].type == BTSEG_NONE) {
    258 			continue;
    259 		}
    260 		pa = bootspace.segs[i].pa + bootspace.segs[i].sz;
    261 		if (pa > max)
    262 			max = pa;
    263 	}
    264 
    265 	return max;
    266 }
    267 
    268 static void
    269 bootspace_addseg(int type, vaddr_t va, paddr_t pa, size_t sz)
    270 {
    271 	size_t i;
    272 
    273 	for (i = 0; i < BTSPACE_NSEGS; i++) {
    274 		if (bootspace.segs[i].type == BTSEG_NONE) {
    275 			bootspace.segs[i].type = type;
    276 			bootspace.segs[i].va = va;
    277 			bootspace.segs[i].pa = pa;
    278 			bootspace.segs[i].sz = sz;
    279 			return;
    280 		}
    281 	}
    282 
    283 	fatal("bootspace_addseg: segments full");
    284 }
    285 
    286 static size_t
    287 mm_shift_segment(vaddr_t va, size_t pagesz, size_t elfsz, size_t elfalign)
    288 {
    289 	size_t shiftsize, offset;
    290 	uint64_t rnd;
    291 
    292 	if (elfalign == 0) {
    293 		elfalign = ELFROUND;
    294 	}
    295 
    296 	ASSERT(pagesz >= elfalign);
    297 	ASSERT(pagesz % elfalign == 0);
    298 	shiftsize = roundup(elfsz, pagesz) - roundup(elfsz, elfalign);
    299 	if (shiftsize == 0) {
    300 		return 0;
    301 	}
    302 
    303 	prng_get_rand(&rnd, sizeof(rnd));
    304 	offset = roundup(rnd % shiftsize, elfalign);
    305 	ASSERT((va + offset) % elfalign == 0);
    306 
    307 	memmove((void *)(va + offset), (void *)va, elfsz);
    308 
    309 	return offset;
    310 }
    311 
    312 static void
    313 mm_map_head(void)
    314 {
    315 	size_t i, npages, size;
    316 	uint64_t rnd;
    317 	vaddr_t randva;
    318 
    319 	/*
    320 	 * To get the size of the head, we give a look at the read-only
    321 	 * mapping of the kernel we created in locore. We're identity mapped,
    322 	 * so kernpa = kernva.
    323 	 */
    324 	size = elf_get_head_size((vaddr_t)kernpa_start);
    325 	npages = size / PAGE_SIZE;
    326 
    327 	prng_get_rand(&rnd, sizeof(rnd));
    328 	randva = rounddown(HEAD_WINDOW_BASE + rnd % (HEAD_WINDOW_SIZE - size),
    329 	    PAGE_SIZE);
    330 	mm_map_tree(randva, randva + size);
    331 
    332 	/* Enter the area and build the ELF info */
    333 	for (i = 0; i < npages; i++) {
    334 		mm_enter_pa(kernpa_start + i * PAGE_SIZE,
    335 		    randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
    336 	}
    337 	elf_build_head(randva);
    338 
    339 	/* Register the values in bootspace */
    340 	bootspace.head.va = randva;
    341 	bootspace.head.pa = kernpa_start;
    342 	bootspace.head.sz = size;
    343 }
    344 
    345 vaddr_t
    346 mm_map_segment(int segtype, paddr_t pa, size_t elfsz, size_t elfalign)
    347 {
    348 	size_t i, npages, size, pagesz, offset;
    349 	vaddr_t randva;
    350 	char pad;
    351 
    352 	if (elfsz <= PAGE_SIZE) {
    353 		pagesz = NBPD_L1;
    354 	} else {
    355 		pagesz = NBPD_L2;
    356 	}
    357 
    358 	size = roundup(elfsz, pagesz);
    359 	randva = mm_randva_kregion(size, pagesz);
    360 
    361 	npages = size / PAGE_SIZE;
    362 	for (i = 0; i < npages; i++) {
    363 		mm_enter_pa(pa + i * PAGE_SIZE,
    364 		    randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
    365 	}
    366 
    367 	offset = mm_shift_segment(randva, pagesz, elfsz, elfalign);
    368 	ASSERT(offset + elfsz <= size);
    369 
    370 	pad = pads[segtype];
    371 	memset((void *)randva, pad, offset);
    372 	memset((void *)(randva + offset + elfsz), pad, size - elfsz - offset);
    373 
    374 	bootspace_addseg(segtype, randva, pa, size);
    375 
    376 	return (randva + offset);
    377 }
    378 
    379 static void
    380 mm_map_boot(void)
    381 {
    382 	size_t i, npages, size;
    383 	vaddr_t randva;
    384 	paddr_t bootpa;
    385 
    386 	/*
    387 	 * The "boot" region is special: its page tree has a fixed size, but
    388 	 * the number of pages entered is lower.
    389 	 */
    390 
    391 	/* Create the page tree */
    392 	size = (NKL2_KIMG_ENTRIES + 1) * NBPD_L2;
    393 	randva = mm_randva_kregion(size, PAGE_SIZE);
    394 
    395 	/* Enter the area and build the ELF info */
    396 	bootpa = bootspace_getend();
    397 	size = (pa_avail - bootpa);
    398 	npages = size / PAGE_SIZE;
    399 	for (i = 0; i < npages; i++) {
    400 		mm_enter_pa(bootpa + i * PAGE_SIZE,
    401 		    randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
    402 	}
    403 	elf_build_boot(randva, bootpa);
    404 
    405 	/* Enter the ISA I/O MEM */
    406 	iom_base = randva + npages * PAGE_SIZE;
    407 	npages = IOM_SIZE / PAGE_SIZE;
    408 	for (i = 0; i < npages; i++) {
    409 		mm_enter_pa(IOM_BEGIN + i * PAGE_SIZE,
    410 		    iom_base + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
    411 	}
    412 
    413 	/* Register the values in bootspace */
    414 	bootspace.boot.va = randva;
    415 	bootspace.boot.pa = bootpa;
    416 	bootspace.boot.sz = (size_t)(iom_base + IOM_SIZE) -
    417 	    (size_t)bootspace.boot.va;
    418 
    419 	/* Initialize the values that are located in the "boot" region */
    420 	extern uint64_t PDPpaddr;
    421 	bootspace.spareva = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
    422 	bootspace.pdir = bootspace.boot.va + (PDPpaddr - bootspace.boot.pa);
    423 	bootspace.smodule = (vaddr_t)iom_base + IOM_SIZE;
    424 	bootspace.emodule = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
    425 }
    426 
    427 /*
    428  * There is a variable number of independent regions: one head, several kernel
    429  * segments, one boot. They are all mapped at random VAs.
    430  *
    431  * Head contains the ELF Header and ELF Section Headers, and we use them to
    432  * map the rest of the regions. Head must be placed in memory *before* the
    433  * other regions.
    434  *
    435  * At the end of this function, the bootspace structure is fully constructed.
    436  */
    437 void
    438 mm_map_kernel(void)
    439 {
    440 	memset(&bootspace, 0, sizeof(bootspace));
    441 	mm_map_head();
    442 	print_state(true, "Head region mapped");
    443 	elf_map_sections();
    444 	print_state(true, "Segments mapped");
    445 	mm_map_boot();
    446 	print_state(true, "Boot region mapped");
    447 }
    448