Home | History | Annotate | Line # | Download | only in prekern
mm.c revision 1.9
      1 /*	$NetBSD: mm.c,v 1.9 2017/11/09 15:24:39 maxv Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Maxime Villard.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 #include "prekern.h"
     32 
     33 #define PAD_TEXT	0xCC
     34 #define PAD_RODATA	0x00
     35 #define PAD_DATA	0x00
     36 
     37 static const pt_entry_t protection_codes[3] = {
     38 	[MM_PROT_READ] = PG_RO | PG_NX,
     39 	[MM_PROT_WRITE] = PG_RW | PG_NX,
     40 	[MM_PROT_EXECUTE] = PG_RO,
     41 	/* RWX does not exist */
     42 };
     43 
     44 struct bootspace bootspace;
     45 
     46 extern paddr_t kernpa_start, kernpa_end;
     47 vaddr_t iom_base;
     48 
     49 paddr_t pa_avail = 0;
     50 static const vaddr_t tmpva = (PREKERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2);
     51 
     52 void
     53 mm_init(paddr_t first_pa)
     54 {
     55 	pa_avail = first_pa;
     56 }
     57 
     58 static void
     59 mm_enter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot)
     60 {
     61 	PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot];
     62 }
     63 
     64 static void
     65 mm_flush_va(vaddr_t va)
     66 {
     67 	asm volatile("invlpg (%0)" ::"r" (va) : "memory");
     68 }
     69 
     70 static paddr_t
     71 mm_palloc(size_t npages)
     72 {
     73 	paddr_t pa;
     74 	size_t i;
     75 
     76 	/* Allocate the physical pages */
     77 	pa = pa_avail;
     78 	pa_avail += npages * PAGE_SIZE;
     79 
     80 	/* Zero them out */
     81 	for (i = 0; i < npages; i++) {
     82 		mm_enter_pa(pa + i * PAGE_SIZE, tmpva,
     83 		    MM_PROT_READ|MM_PROT_WRITE);
     84 		mm_flush_va(tmpva);
     85 		memset((void *)tmpva, 0, PAGE_SIZE);
     86 	}
     87 
     88 	return pa;
     89 }
     90 
     91 static bool
     92 mm_pte_is_valid(pt_entry_t pte)
     93 {
     94 	return ((pte & PG_V) != 0);
     95 }
     96 
     97 paddr_t
     98 mm_vatopa(vaddr_t va)
     99 {
    100 	return (PTE_BASE[pl1_i(va)] & PG_FRAME);
    101 }
    102 
    103 static void
    104 mm_mprotect(vaddr_t startva, size_t size, int prot)
    105 {
    106 	size_t i, npages;
    107 	vaddr_t va;
    108 	paddr_t pa;
    109 
    110 	ASSERT(size % PAGE_SIZE == 0);
    111 	npages = size / PAGE_SIZE;
    112 
    113 	for (i = 0; i < npages; i++) {
    114 		va = startva + i * PAGE_SIZE;
    115 		pa = (PTE_BASE[pl1_i(va)] & PG_FRAME);
    116 		mm_enter_pa(pa, va, prot);
    117 		mm_flush_va(va);
    118 	}
    119 }
    120 
    121 void
    122 mm_bootspace_mprotect()
    123 {
    124 	/*
    125 	 * Remap the kernel segments with proper permissions.
    126 	 */
    127 	mm_mprotect(bootspace.text.va, bootspace.text.sz,
    128 	    MM_PROT_READ|MM_PROT_EXECUTE);
    129 	mm_mprotect(bootspace.rodata.va, bootspace.rodata.sz,
    130 	    MM_PROT_READ);
    131 
    132 	print_state(true, "Segments protection updated");
    133 }
    134 
    135 static size_t
    136 mm_nentries_range(vaddr_t startva, vaddr_t endva, size_t pgsz)
    137 {
    138 	size_t npages;
    139 
    140 	npages = roundup((endva / PAGE_SIZE), (pgsz / PAGE_SIZE)) -
    141 	    rounddown((startva / PAGE_SIZE), (pgsz / PAGE_SIZE));
    142 	return (npages / (pgsz / PAGE_SIZE));
    143 }
    144 
    145 static void
    146 mm_map_tree(vaddr_t startva, vaddr_t endva)
    147 {
    148 	size_t i, nL4e, nL3e, nL2e;
    149 	size_t L4e_idx, L3e_idx, L2e_idx;
    150 	paddr_t pa;
    151 
    152 	/*
    153 	 * Build L4.
    154 	 */
    155 	L4e_idx = pl4_i(startva);
    156 	nL4e = mm_nentries_range(startva, endva, NBPD_L4);
    157 	ASSERT(L4e_idx == 511);
    158 	ASSERT(nL4e == 1);
    159 	if (!mm_pte_is_valid(L4_BASE[L4e_idx])) {
    160 		pa = mm_palloc(1);
    161 		L4_BASE[L4e_idx] = pa | PG_V | PG_RW;
    162 	}
    163 
    164 	/*
    165 	 * Build L3.
    166 	 */
    167 	L3e_idx = pl3_i(startva);
    168 	nL3e = mm_nentries_range(startva, endva, NBPD_L3);
    169 	for (i = 0; i < nL3e; i++) {
    170 		if (mm_pte_is_valid(L3_BASE[L3e_idx+i])) {
    171 			continue;
    172 		}
    173 		pa = mm_palloc(1);
    174 		L3_BASE[L3e_idx+i] = pa | PG_V | PG_RW;
    175 	}
    176 
    177 	/*
    178 	 * Build L2.
    179 	 */
    180 	L2e_idx = pl2_i(startva);
    181 	nL2e = mm_nentries_range(startva, endva, NBPD_L2);
    182 	for (i = 0; i < nL2e; i++) {
    183 		if (mm_pte_is_valid(L2_BASE[L2e_idx+i])) {
    184 			continue;
    185 		}
    186 		pa = mm_palloc(1);
    187 		L2_BASE[L2e_idx+i] = pa | PG_V | PG_RW;
    188 	}
    189 }
    190 
    191 static uint64_t
    192 mm_rand_num64()
    193 {
    194 	/* XXX: yes, this is ridiculous, will be fixed soon */
    195 	return rdtsc();
    196 }
    197 
    198 static void
    199 mm_map_head()
    200 {
    201 	size_t i, npages, size;
    202 	uint64_t rnd;
    203 	vaddr_t randva;
    204 
    205 	/*
    206 	 * To get the size of the head, we give a look at the read-only
    207 	 * mapping of the kernel we created in locore. We're identity mapped,
    208 	 * so kernpa = kernva.
    209 	 */
    210 	size = elf_get_head_size((vaddr_t)kernpa_start);
    211 	npages = size / PAGE_SIZE;
    212 
    213 	rnd = mm_rand_num64();
    214 	randva = rounddown(HEAD_WINDOW_BASE + rnd % (HEAD_WINDOW_SIZE - size),
    215 	    PAGE_SIZE);
    216 	mm_map_tree(randva, randva + size);
    217 
    218 	/* Enter the area and build the ELF info */
    219 	for (i = 0; i < npages; i++) {
    220 		mm_enter_pa(kernpa_start + i * PAGE_SIZE,
    221 		    randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
    222 	}
    223 	elf_build_head(randva);
    224 
    225 	/* Register the values in bootspace */
    226 	bootspace.head.va = randva;
    227 	bootspace.head.pa = kernpa_start;
    228 	bootspace.head.sz = size;
    229 }
    230 
    231 static vaddr_t
    232 mm_randva_kregion(size_t size)
    233 {
    234 	static struct {
    235 		vaddr_t sva;
    236 		vaddr_t eva;
    237 	} regions[4];
    238 	static size_t idx = 0;
    239 	vaddr_t randva;
    240 	uint64_t rnd;
    241 	size_t i;
    242 	bool ok;
    243 
    244 	ASSERT(idx < 4);
    245 
    246 	while (1) {
    247 		rnd = mm_rand_num64();
    248 		randva = rounddown(KASLR_WINDOW_BASE +
    249 		    rnd % (KASLR_WINDOW_SIZE - size), PAGE_SIZE);
    250 
    251 		/* Detect collisions */
    252 		ok = true;
    253 		for (i = 0; i < idx; i++) {
    254 			if ((regions[i].sva <= randva) &&
    255 			    (randva < regions[i].eva)) {
    256 				ok = false;
    257 				break;
    258 			}
    259 			if ((regions[i].sva < randva + size) &&
    260 			    (randva + size <= regions[i].eva)) {
    261 				ok = false;
    262 				break;
    263 			}
    264 		}
    265 		if (ok) {
    266 			break;
    267 		}
    268 	}
    269 
    270 	regions[idx].eva = randva;
    271 	regions[idx].sva = randva + size;
    272 	idx++;
    273 
    274 	mm_map_tree(randva, randva + size);
    275 
    276 	return randva;
    277 }
    278 
    279 static void
    280 mm_map_segments()
    281 {
    282 	size_t i, npages, size, elfsz;
    283 	vaddr_t randva;
    284 	paddr_t pa;
    285 
    286 	/*
    287 	 * Kernel text segment.
    288 	 */
    289 	elf_get_text(&pa, &elfsz);
    290 	size = roundup(elfsz, PAGE_SIZE);
    291 	randva = mm_randva_kregion(size);
    292 	npages = size / PAGE_SIZE;
    293 
    294 	/* Enter the area and build the ELF info */
    295 	for (i = 0; i < npages; i++) {
    296 		mm_enter_pa(pa + i * PAGE_SIZE,
    297 		    randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
    298 	}
    299 	elf_build_text(randva, pa);
    300 
    301 	/* Fill in the padding */
    302 	memset((void *)(randva + elfsz), PAD_TEXT, size - elfsz);
    303 
    304 	/* Register the values in bootspace */
    305 	bootspace.text.va = randva;
    306 	bootspace.text.pa = pa;
    307 	bootspace.text.sz = size;
    308 
    309 	/*
    310 	 * Kernel rodata segment.
    311 	 */
    312 	elf_get_rodata(&pa, &elfsz);
    313 	size = roundup(elfsz, PAGE_SIZE);
    314 	randva = mm_randva_kregion(size);
    315 	npages = size / PAGE_SIZE;
    316 
    317 	/* Enter the area and build the ELF info */
    318 	for (i = 0; i < npages; i++) {
    319 		mm_enter_pa(pa + i * PAGE_SIZE,
    320 		    randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
    321 	}
    322 	elf_build_rodata(randva, pa);
    323 
    324 	/* Fill in the padding */
    325 	memset((void *)(randva + elfsz), PAD_RODATA, size - elfsz);
    326 
    327 	/* Register the values in bootspace */
    328 	bootspace.rodata.va = randva;
    329 	bootspace.rodata.pa = pa;
    330 	bootspace.rodata.sz = size;
    331 
    332 	/*
    333 	 * Kernel data segment.
    334 	 */
    335 	elf_get_data(&pa, &elfsz);
    336 	size = roundup(elfsz, PAGE_SIZE);
    337 	randva = mm_randva_kregion(size);
    338 	npages = size / PAGE_SIZE;
    339 
    340 	/* Enter the area and build the ELF info */
    341 	for (i = 0; i < npages; i++) {
    342 		mm_enter_pa(pa + i * PAGE_SIZE,
    343 		    randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
    344 	}
    345 	elf_build_data(randva, pa);
    346 
    347 	/* Fill in the padding */
    348 	memset((void *)(randva + elfsz), PAD_DATA, size - elfsz);
    349 
    350 	/* Register the values in bootspace */
    351 	bootspace.data.va = randva;
    352 	bootspace.data.pa = pa;
    353 	bootspace.data.sz = size;
    354 }
    355 
    356 static void
    357 mm_map_boot()
    358 {
    359 	size_t i, npages, size;
    360 	vaddr_t randva;
    361 	paddr_t bootpa;
    362 
    363 	/*
    364 	 * The "boot" region is special: its page tree has a fixed size, but
    365 	 * the number of pages entered is lower.
    366 	 */
    367 
    368 	/* Create the page tree */
    369 	size = (NKL2_KIMG_ENTRIES + 1) * NBPD_L2;
    370 	randva = mm_randva_kregion(size);
    371 
    372 	/* Enter the area and build the ELF info */
    373 	bootpa = bootspace.data.pa + bootspace.data.sz;
    374 	size = (pa_avail - bootpa);
    375 	npages = size / PAGE_SIZE;
    376 	for (i = 0; i < npages; i++) {
    377 		mm_enter_pa(bootpa + i * PAGE_SIZE,
    378 		    randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
    379 	}
    380 	elf_build_boot(randva, bootpa);
    381 
    382 	/* Enter the ISA I/O MEM */
    383 	iom_base = randva + npages * PAGE_SIZE;
    384 	npages = IOM_SIZE / PAGE_SIZE;
    385 	for (i = 0; i < npages; i++) {
    386 		mm_enter_pa(IOM_BEGIN + i * PAGE_SIZE,
    387 		    iom_base + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
    388 	}
    389 
    390 	/* Register the values in bootspace */
    391 	bootspace.boot.va = randva;
    392 	bootspace.boot.pa = bootpa;
    393 	bootspace.boot.sz = (size_t)(iom_base + IOM_SIZE) -
    394 	    (size_t)bootspace.boot.va;
    395 
    396 	/* Initialize the values that are located in the "boot" region */
    397 	extern uint64_t PDPpaddr;
    398 	bootspace.spareva = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
    399 	bootspace.pdir = bootspace.boot.va + (PDPpaddr - bootspace.boot.pa);
    400 	bootspace.emodule = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
    401 }
    402 
    403 /*
    404  * There are five independent regions: head, text, rodata, data, boot. They are
    405  * all mapped at random VAs.
    406  *
    407  * Head contains the ELF Header and ELF Section Headers, and we use them to
    408  * map the rest of the regions. Head must be placed in memory *before* the
    409  * other regions.
    410  *
    411  * At the end of this function, the bootspace structure is fully constructed.
    412  */
    413 void
    414 mm_map_kernel()
    415 {
    416 	memset(&bootspace, 0, sizeof(bootspace));
    417 	mm_map_head();
    418 	print_state(true, "Head region mapped");
    419 	mm_map_segments();
    420 	print_state(true, "Segments mapped");
    421 	mm_map_boot();
    422 	print_state(true, "Boot region mapped");
    423 }
    424 
    425