Home | History | Annotate | Line # | Download | only in prekern
mm.c revision 1.20
      1 /*	$NetBSD: mm.c,v 1.20 2017/11/26 14:29:48 maxv Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Maxime Villard.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 #include "prekern.h"
     32 
     33 #define PAD_TEXT	0xCC
     34 #define PAD_RODATA	0x00
     35 #define PAD_DATA	0x00
     36 
     37 #define ELFROUND	64
     38 
     39 static const uint8_t pads[4] = {
     40 	[BTSEG_NONE] = 0x00,
     41 	[BTSEG_TEXT] = 0xCC,
     42 	[BTSEG_RODATA] = 0x00,
     43 	[BTSEG_DATA] = 0x00
     44 };
     45 
     46 #define MM_PROT_READ	0x00
     47 #define MM_PROT_WRITE	0x01
     48 #define MM_PROT_EXECUTE	0x02
     49 
     50 static const pt_entry_t protection_codes[3] = {
     51 	[MM_PROT_READ] = PG_RO | PG_NX,
     52 	[MM_PROT_WRITE] = PG_RW | PG_NX,
     53 	[MM_PROT_EXECUTE] = PG_RO,
     54 	/* RWX does not exist */
     55 };
     56 
     57 struct bootspace bootspace;
     58 
     59 extern paddr_t kernpa_start, kernpa_end;
     60 vaddr_t iom_base;
     61 
     62 paddr_t pa_avail = 0;
     63 static const vaddr_t tmpva = (PREKERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2);
     64 
     65 void
     66 mm_init(paddr_t first_pa)
     67 {
     68 	pa_avail = first_pa;
     69 }
     70 
     71 static void
     72 mm_enter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot)
     73 {
     74 	if (PTE_BASE[pl1_i(va)] & PG_V) {
     75 		fatal("mm_enter_pa: mapping already present");
     76 	}
     77 	PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot];
     78 }
     79 
     80 static void
     81 mm_reenter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot)
     82 {
     83 	PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot];
     84 }
     85 
     86 static void
     87 mm_flush_va(vaddr_t va)
     88 {
     89 	asm volatile("invlpg (%0)" ::"r" (va) : "memory");
     90 }
     91 
     92 static paddr_t
     93 mm_palloc(size_t npages)
     94 {
     95 	paddr_t pa;
     96 	size_t i;
     97 
     98 	/* Allocate the physical pages */
     99 	pa = pa_avail;
    100 	pa_avail += npages * PAGE_SIZE;
    101 
    102 	/* Zero them out */
    103 	for (i = 0; i < npages; i++) {
    104 		mm_reenter_pa(pa + i * PAGE_SIZE, tmpva,
    105 		    MM_PROT_READ|MM_PROT_WRITE);
    106 		mm_flush_va(tmpva);
    107 		memset((void *)tmpva, 0, PAGE_SIZE);
    108 	}
    109 
    110 	return pa;
    111 }
    112 
    113 static bool
    114 mm_pte_is_valid(pt_entry_t pte)
    115 {
    116 	return ((pte & PG_V) != 0);
    117 }
    118 
    119 static void
    120 mm_mprotect(vaddr_t startva, size_t size, pte_prot_t prot)
    121 {
    122 	size_t i, npages;
    123 	vaddr_t va;
    124 	paddr_t pa;
    125 
    126 	ASSERT(size % PAGE_SIZE == 0);
    127 	npages = size / PAGE_SIZE;
    128 
    129 	for (i = 0; i < npages; i++) {
    130 		va = startva + i * PAGE_SIZE;
    131 		pa = (PTE_BASE[pl1_i(va)] & PG_FRAME);
    132 		mm_reenter_pa(pa, va, prot);
    133 		mm_flush_va(va);
    134 	}
    135 }
    136 
    137 void
    138 mm_bootspace_mprotect(void)
    139 {
    140 	pte_prot_t prot;
    141 	size_t i;
    142 
    143 	/* Remap the kernel segments with proper permissions. */
    144 	for (i = 0; i < BTSPACE_NSEGS; i++) {
    145 		if (bootspace.segs[i].type == BTSEG_TEXT) {
    146 			prot = MM_PROT_READ|MM_PROT_EXECUTE;
    147 		} else if (bootspace.segs[i].type == BTSEG_RODATA) {
    148 			prot = MM_PROT_READ;
    149 		} else {
    150 			continue;
    151 		}
    152 		mm_mprotect(bootspace.segs[i].va, bootspace.segs[i].sz, prot);
    153 	}
    154 
    155 	print_state(true, "Segments protection updated");
    156 }
    157 
    158 static size_t
    159 mm_nentries_range(vaddr_t startva, vaddr_t endva, size_t pgsz)
    160 {
    161 	size_t npages;
    162 
    163 	npages = roundup((endva / PAGE_SIZE), (pgsz / PAGE_SIZE)) -
    164 	    rounddown((startva / PAGE_SIZE), (pgsz / PAGE_SIZE));
    165 	return (npages / (pgsz / PAGE_SIZE));
    166 }
    167 
    168 static void
    169 mm_map_tree(vaddr_t startva, vaddr_t endva)
    170 {
    171 	size_t i, nL4e, nL3e, nL2e;
    172 	size_t L4e_idx, L3e_idx, L2e_idx;
    173 	paddr_t pa;
    174 
    175 	/* Build L4. */
    176 	L4e_idx = pl4_i(startva);
    177 	nL4e = mm_nentries_range(startva, endva, NBPD_L4);
    178 	ASSERT(L4e_idx == 511);
    179 	ASSERT(nL4e == 1);
    180 	if (!mm_pte_is_valid(L4_BASE[L4e_idx])) {
    181 		pa = mm_palloc(1);
    182 		L4_BASE[L4e_idx] = pa | PG_V | PG_RW;
    183 	}
    184 
    185 	/* Build L3. */
    186 	L3e_idx = pl3_i(startva);
    187 	nL3e = mm_nentries_range(startva, endva, NBPD_L3);
    188 	for (i = 0; i < nL3e; i++) {
    189 		if (mm_pte_is_valid(L3_BASE[L3e_idx+i])) {
    190 			continue;
    191 		}
    192 		pa = mm_palloc(1);
    193 		L3_BASE[L3e_idx+i] = pa | PG_V | PG_RW;
    194 	}
    195 
    196 	/* Build L2. */
    197 	L2e_idx = pl2_i(startva);
    198 	nL2e = mm_nentries_range(startva, endva, NBPD_L2);
    199 	for (i = 0; i < nL2e; i++) {
    200 		if (mm_pte_is_valid(L2_BASE[L2e_idx+i])) {
    201 			continue;
    202 		}
    203 		pa = mm_palloc(1);
    204 		L2_BASE[L2e_idx+i] = pa | PG_V | PG_RW;
    205 	}
    206 }
    207 
    208 static vaddr_t
    209 mm_randva_kregion(size_t size, size_t pagesz)
    210 {
    211 	vaddr_t sva, eva;
    212 	vaddr_t randva;
    213 	uint64_t rnd;
    214 	size_t i;
    215 	bool ok;
    216 
    217 	while (1) {
    218 		prng_get_rand(&rnd, sizeof(rnd));
    219 		randva = rounddown(KASLR_WINDOW_BASE +
    220 		    rnd % (KASLR_WINDOW_SIZE - size), pagesz);
    221 
    222 		/* Detect collisions */
    223 		ok = true;
    224 		for (i = 0; i < BTSPACE_NSEGS; i++) {
    225 			if (bootspace.segs[i].type == BTSEG_NONE) {
    226 				continue;
    227 			}
    228 			sva = bootspace.segs[i].va;
    229 			eva = sva + bootspace.segs[i].sz;
    230 
    231 			if ((sva <= randva) && (randva < eva)) {
    232 				ok = false;
    233 				break;
    234 			}
    235 			if ((sva < randva + size) && (randva + size <= eva)) {
    236 				ok = false;
    237 				break;
    238 			}
    239 			if (randva < sva && eva < (randva + size)) {
    240 				ok = false;
    241 				break;
    242 			}
    243 		}
    244 		if (ok) {
    245 			break;
    246 		}
    247 	}
    248 
    249 	mm_map_tree(randva, randva + size);
    250 
    251 	return randva;
    252 }
    253 
    254 static paddr_t
    255 bootspace_getend(void)
    256 {
    257 	paddr_t pa, max = 0;
    258 	size_t i;
    259 
    260 	for (i = 0; i < BTSPACE_NSEGS; i++) {
    261 		if (bootspace.segs[i].type == BTSEG_NONE) {
    262 			continue;
    263 		}
    264 		pa = bootspace.segs[i].pa + bootspace.segs[i].sz;
    265 		if (pa > max)
    266 			max = pa;
    267 	}
    268 
    269 	return max;
    270 }
    271 
    272 static void
    273 bootspace_addseg(int type, vaddr_t va, paddr_t pa, size_t sz)
    274 {
    275 	size_t i;
    276 
    277 	for (i = 0; i < BTSPACE_NSEGS; i++) {
    278 		if (bootspace.segs[i].type == BTSEG_NONE) {
    279 			bootspace.segs[i].type = type;
    280 			bootspace.segs[i].va = va;
    281 			bootspace.segs[i].pa = pa;
    282 			bootspace.segs[i].sz = sz;
    283 			return;
    284 		}
    285 	}
    286 
    287 	fatal("bootspace_addseg: segments full");
    288 }
    289 
    290 static size_t
    291 mm_shift_segment(vaddr_t va, size_t pagesz, size_t elfsz, size_t elfalign)
    292 {
    293 	size_t shiftsize, offset;
    294 	uint64_t rnd;
    295 
    296 	if (elfalign == 0) {
    297 		elfalign = ELFROUND;
    298 	}
    299 
    300 	ASSERT(pagesz >= elfalign);
    301 	ASSERT(pagesz % elfalign == 0);
    302 	shiftsize = roundup(elfsz, pagesz) - roundup(elfsz, elfalign);
    303 	if (shiftsize == 0) {
    304 		return 0;
    305 	}
    306 
    307 	prng_get_rand(&rnd, sizeof(rnd));
    308 	offset = roundup(rnd % shiftsize, elfalign);
    309 	ASSERT((va + offset) % elfalign == 0);
    310 
    311 	memmove((void *)(va + offset), (void *)va, elfsz);
    312 
    313 	return offset;
    314 }
    315 
    316 static void
    317 mm_map_head(void)
    318 {
    319 	size_t i, npages, size;
    320 	uint64_t rnd;
    321 	vaddr_t randva;
    322 
    323 	/*
    324 	 * To get the size of the head, we give a look at the read-only
    325 	 * mapping of the kernel we created in locore. We're identity mapped,
    326 	 * so kernpa = kernva.
    327 	 */
    328 	size = elf_get_head_size((vaddr_t)kernpa_start);
    329 	npages = size / PAGE_SIZE;
    330 
    331 	prng_get_rand(&rnd, sizeof(rnd));
    332 	randva = rounddown(HEAD_WINDOW_BASE + rnd % (HEAD_WINDOW_SIZE - size),
    333 	    PAGE_SIZE);
    334 	mm_map_tree(randva, randva + size);
    335 
    336 	/* Enter the area and build the ELF info */
    337 	for (i = 0; i < npages; i++) {
    338 		mm_enter_pa(kernpa_start + i * PAGE_SIZE,
    339 		    randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
    340 	}
    341 	elf_build_head(randva);
    342 
    343 	/* Register the values in bootspace */
    344 	bootspace.head.va = randva;
    345 	bootspace.head.pa = kernpa_start;
    346 	bootspace.head.sz = size;
    347 }
    348 
    349 vaddr_t
    350 mm_map_segment(int segtype, paddr_t pa, size_t elfsz, size_t elfalign)
    351 {
    352 	size_t i, npages, size, pagesz, offset;
    353 	vaddr_t randva;
    354 	char pad;
    355 
    356 	if (elfsz <= PAGE_SIZE) {
    357 		pagesz = NBPD_L1;
    358 	} else {
    359 		pagesz = NBPD_L2;
    360 	}
    361 
    362 	size = roundup(elfsz, pagesz);
    363 	randva = mm_randva_kregion(size, pagesz);
    364 
    365 	npages = size / PAGE_SIZE;
    366 	for (i = 0; i < npages; i++) {
    367 		mm_enter_pa(pa + i * PAGE_SIZE,
    368 		    randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
    369 	}
    370 
    371 	offset = mm_shift_segment(randva, pagesz, elfsz, elfalign);
    372 	ASSERT(offset + elfsz <= size);
    373 
    374 	pad = pads[segtype];
    375 	memset((void *)randva, pad, offset);
    376 	memset((void *)(randva + offset + elfsz), pad, size - elfsz - offset);
    377 
    378 	bootspace_addseg(segtype, randva, pa, size);
    379 
    380 	return (randva + offset);
    381 }
    382 
    383 static void
    384 mm_map_boot(void)
    385 {
    386 	size_t i, npages, size;
    387 	vaddr_t randva;
    388 	paddr_t bootpa;
    389 
    390 	/*
    391 	 * The "boot" region is special: its page tree has a fixed size, but
    392 	 * the number of pages entered is lower.
    393 	 */
    394 
    395 	/* Create the page tree */
    396 	size = (NKL2_KIMG_ENTRIES + 1) * NBPD_L2;
    397 	randva = mm_randva_kregion(size, PAGE_SIZE);
    398 
    399 	/* Enter the area and build the ELF info */
    400 	bootpa = bootspace_getend();
    401 	size = (pa_avail - bootpa);
    402 	npages = size / PAGE_SIZE;
    403 	for (i = 0; i < npages; i++) {
    404 		mm_enter_pa(bootpa + i * PAGE_SIZE,
    405 		    randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
    406 	}
    407 	elf_build_boot(randva, bootpa);
    408 
    409 	/* Enter the ISA I/O MEM */
    410 	iom_base = randva + npages * PAGE_SIZE;
    411 	npages = IOM_SIZE / PAGE_SIZE;
    412 	for (i = 0; i < npages; i++) {
    413 		mm_enter_pa(IOM_BEGIN + i * PAGE_SIZE,
    414 		    iom_base + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
    415 	}
    416 
    417 	/* Register the values in bootspace */
    418 	bootspace.boot.va = randva;
    419 	bootspace.boot.pa = bootpa;
    420 	bootspace.boot.sz = (size_t)(iom_base + IOM_SIZE) -
    421 	    (size_t)bootspace.boot.va;
    422 
    423 	/* Initialize the values that are located in the "boot" region */
    424 	extern uint64_t PDPpaddr;
    425 	bootspace.spareva = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
    426 	bootspace.pdir = bootspace.boot.va + (PDPpaddr - bootspace.boot.pa);
    427 	bootspace.emodule = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
    428 }
    429 
    430 /*
    431  * There is a variable number of independent regions: one head, several kernel
    432  * segments, one boot. They are all mapped at random VAs.
    433  *
    434  * Head contains the ELF Header and ELF Section Headers, and we use them to
    435  * map the rest of the regions. Head must be placed in memory *before* the
    436  * other regions.
    437  *
    438  * At the end of this function, the bootspace structure is fully constructed.
    439  */
    440 void
    441 mm_map_kernel(void)
    442 {
    443 	memset(&bootspace, 0, sizeof(bootspace));
    444 	mm_map_head();
    445 	print_state(true, "Head region mapped");
    446 	elf_map_sections();
    447 	print_state(true, "Segments mapped");
    448 	mm_map_boot();
    449 	print_state(true, "Boot region mapped");
    450 }
    451