mm.c revision 1.20 1 /* $NetBSD: mm.c,v 1.20 2017/11/26 14:29:48 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Maxime Villard.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "prekern.h"
32
33 #define PAD_TEXT 0xCC
34 #define PAD_RODATA 0x00
35 #define PAD_DATA 0x00
36
37 #define ELFROUND 64
38
39 static const uint8_t pads[4] = {
40 [BTSEG_NONE] = 0x00,
41 [BTSEG_TEXT] = 0xCC,
42 [BTSEG_RODATA] = 0x00,
43 [BTSEG_DATA] = 0x00
44 };
45
46 #define MM_PROT_READ 0x00
47 #define MM_PROT_WRITE 0x01
48 #define MM_PROT_EXECUTE 0x02
49
50 static const pt_entry_t protection_codes[3] = {
51 [MM_PROT_READ] = PG_RO | PG_NX,
52 [MM_PROT_WRITE] = PG_RW | PG_NX,
53 [MM_PROT_EXECUTE] = PG_RO,
54 /* RWX does not exist */
55 };
56
57 struct bootspace bootspace;
58
59 extern paddr_t kernpa_start, kernpa_end;
60 vaddr_t iom_base;
61
62 paddr_t pa_avail = 0;
63 static const vaddr_t tmpva = (PREKERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2);
64
65 void
66 mm_init(paddr_t first_pa)
67 {
68 pa_avail = first_pa;
69 }
70
71 static void
72 mm_enter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot)
73 {
74 if (PTE_BASE[pl1_i(va)] & PG_V) {
75 fatal("mm_enter_pa: mapping already present");
76 }
77 PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot];
78 }
79
80 static void
81 mm_reenter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot)
82 {
83 PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot];
84 }
85
86 static void
87 mm_flush_va(vaddr_t va)
88 {
89 asm volatile("invlpg (%0)" ::"r" (va) : "memory");
90 }
91
92 static paddr_t
93 mm_palloc(size_t npages)
94 {
95 paddr_t pa;
96 size_t i;
97
98 /* Allocate the physical pages */
99 pa = pa_avail;
100 pa_avail += npages * PAGE_SIZE;
101
102 /* Zero them out */
103 for (i = 0; i < npages; i++) {
104 mm_reenter_pa(pa + i * PAGE_SIZE, tmpva,
105 MM_PROT_READ|MM_PROT_WRITE);
106 mm_flush_va(tmpva);
107 memset((void *)tmpva, 0, PAGE_SIZE);
108 }
109
110 return pa;
111 }
112
113 static bool
114 mm_pte_is_valid(pt_entry_t pte)
115 {
116 return ((pte & PG_V) != 0);
117 }
118
119 static void
120 mm_mprotect(vaddr_t startva, size_t size, pte_prot_t prot)
121 {
122 size_t i, npages;
123 vaddr_t va;
124 paddr_t pa;
125
126 ASSERT(size % PAGE_SIZE == 0);
127 npages = size / PAGE_SIZE;
128
129 for (i = 0; i < npages; i++) {
130 va = startva + i * PAGE_SIZE;
131 pa = (PTE_BASE[pl1_i(va)] & PG_FRAME);
132 mm_reenter_pa(pa, va, prot);
133 mm_flush_va(va);
134 }
135 }
136
137 void
138 mm_bootspace_mprotect(void)
139 {
140 pte_prot_t prot;
141 size_t i;
142
143 /* Remap the kernel segments with proper permissions. */
144 for (i = 0; i < BTSPACE_NSEGS; i++) {
145 if (bootspace.segs[i].type == BTSEG_TEXT) {
146 prot = MM_PROT_READ|MM_PROT_EXECUTE;
147 } else if (bootspace.segs[i].type == BTSEG_RODATA) {
148 prot = MM_PROT_READ;
149 } else {
150 continue;
151 }
152 mm_mprotect(bootspace.segs[i].va, bootspace.segs[i].sz, prot);
153 }
154
155 print_state(true, "Segments protection updated");
156 }
157
158 static size_t
159 mm_nentries_range(vaddr_t startva, vaddr_t endva, size_t pgsz)
160 {
161 size_t npages;
162
163 npages = roundup((endva / PAGE_SIZE), (pgsz / PAGE_SIZE)) -
164 rounddown((startva / PAGE_SIZE), (pgsz / PAGE_SIZE));
165 return (npages / (pgsz / PAGE_SIZE));
166 }
167
168 static void
169 mm_map_tree(vaddr_t startva, vaddr_t endva)
170 {
171 size_t i, nL4e, nL3e, nL2e;
172 size_t L4e_idx, L3e_idx, L2e_idx;
173 paddr_t pa;
174
175 /* Build L4. */
176 L4e_idx = pl4_i(startva);
177 nL4e = mm_nentries_range(startva, endva, NBPD_L4);
178 ASSERT(L4e_idx == 511);
179 ASSERT(nL4e == 1);
180 if (!mm_pte_is_valid(L4_BASE[L4e_idx])) {
181 pa = mm_palloc(1);
182 L4_BASE[L4e_idx] = pa | PG_V | PG_RW;
183 }
184
185 /* Build L3. */
186 L3e_idx = pl3_i(startva);
187 nL3e = mm_nentries_range(startva, endva, NBPD_L3);
188 for (i = 0; i < nL3e; i++) {
189 if (mm_pte_is_valid(L3_BASE[L3e_idx+i])) {
190 continue;
191 }
192 pa = mm_palloc(1);
193 L3_BASE[L3e_idx+i] = pa | PG_V | PG_RW;
194 }
195
196 /* Build L2. */
197 L2e_idx = pl2_i(startva);
198 nL2e = mm_nentries_range(startva, endva, NBPD_L2);
199 for (i = 0; i < nL2e; i++) {
200 if (mm_pte_is_valid(L2_BASE[L2e_idx+i])) {
201 continue;
202 }
203 pa = mm_palloc(1);
204 L2_BASE[L2e_idx+i] = pa | PG_V | PG_RW;
205 }
206 }
207
208 static vaddr_t
209 mm_randva_kregion(size_t size, size_t pagesz)
210 {
211 vaddr_t sva, eva;
212 vaddr_t randva;
213 uint64_t rnd;
214 size_t i;
215 bool ok;
216
217 while (1) {
218 prng_get_rand(&rnd, sizeof(rnd));
219 randva = rounddown(KASLR_WINDOW_BASE +
220 rnd % (KASLR_WINDOW_SIZE - size), pagesz);
221
222 /* Detect collisions */
223 ok = true;
224 for (i = 0; i < BTSPACE_NSEGS; i++) {
225 if (bootspace.segs[i].type == BTSEG_NONE) {
226 continue;
227 }
228 sva = bootspace.segs[i].va;
229 eva = sva + bootspace.segs[i].sz;
230
231 if ((sva <= randva) && (randva < eva)) {
232 ok = false;
233 break;
234 }
235 if ((sva < randva + size) && (randva + size <= eva)) {
236 ok = false;
237 break;
238 }
239 if (randva < sva && eva < (randva + size)) {
240 ok = false;
241 break;
242 }
243 }
244 if (ok) {
245 break;
246 }
247 }
248
249 mm_map_tree(randva, randva + size);
250
251 return randva;
252 }
253
254 static paddr_t
255 bootspace_getend(void)
256 {
257 paddr_t pa, max = 0;
258 size_t i;
259
260 for (i = 0; i < BTSPACE_NSEGS; i++) {
261 if (bootspace.segs[i].type == BTSEG_NONE) {
262 continue;
263 }
264 pa = bootspace.segs[i].pa + bootspace.segs[i].sz;
265 if (pa > max)
266 max = pa;
267 }
268
269 return max;
270 }
271
272 static void
273 bootspace_addseg(int type, vaddr_t va, paddr_t pa, size_t sz)
274 {
275 size_t i;
276
277 for (i = 0; i < BTSPACE_NSEGS; i++) {
278 if (bootspace.segs[i].type == BTSEG_NONE) {
279 bootspace.segs[i].type = type;
280 bootspace.segs[i].va = va;
281 bootspace.segs[i].pa = pa;
282 bootspace.segs[i].sz = sz;
283 return;
284 }
285 }
286
287 fatal("bootspace_addseg: segments full");
288 }
289
290 static size_t
291 mm_shift_segment(vaddr_t va, size_t pagesz, size_t elfsz, size_t elfalign)
292 {
293 size_t shiftsize, offset;
294 uint64_t rnd;
295
296 if (elfalign == 0) {
297 elfalign = ELFROUND;
298 }
299
300 ASSERT(pagesz >= elfalign);
301 ASSERT(pagesz % elfalign == 0);
302 shiftsize = roundup(elfsz, pagesz) - roundup(elfsz, elfalign);
303 if (shiftsize == 0) {
304 return 0;
305 }
306
307 prng_get_rand(&rnd, sizeof(rnd));
308 offset = roundup(rnd % shiftsize, elfalign);
309 ASSERT((va + offset) % elfalign == 0);
310
311 memmove((void *)(va + offset), (void *)va, elfsz);
312
313 return offset;
314 }
315
316 static void
317 mm_map_head(void)
318 {
319 size_t i, npages, size;
320 uint64_t rnd;
321 vaddr_t randva;
322
323 /*
324 * To get the size of the head, we give a look at the read-only
325 * mapping of the kernel we created in locore. We're identity mapped,
326 * so kernpa = kernva.
327 */
328 size = elf_get_head_size((vaddr_t)kernpa_start);
329 npages = size / PAGE_SIZE;
330
331 prng_get_rand(&rnd, sizeof(rnd));
332 randva = rounddown(HEAD_WINDOW_BASE + rnd % (HEAD_WINDOW_SIZE - size),
333 PAGE_SIZE);
334 mm_map_tree(randva, randva + size);
335
336 /* Enter the area and build the ELF info */
337 for (i = 0; i < npages; i++) {
338 mm_enter_pa(kernpa_start + i * PAGE_SIZE,
339 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
340 }
341 elf_build_head(randva);
342
343 /* Register the values in bootspace */
344 bootspace.head.va = randva;
345 bootspace.head.pa = kernpa_start;
346 bootspace.head.sz = size;
347 }
348
349 vaddr_t
350 mm_map_segment(int segtype, paddr_t pa, size_t elfsz, size_t elfalign)
351 {
352 size_t i, npages, size, pagesz, offset;
353 vaddr_t randva;
354 char pad;
355
356 if (elfsz <= PAGE_SIZE) {
357 pagesz = NBPD_L1;
358 } else {
359 pagesz = NBPD_L2;
360 }
361
362 size = roundup(elfsz, pagesz);
363 randva = mm_randva_kregion(size, pagesz);
364
365 npages = size / PAGE_SIZE;
366 for (i = 0; i < npages; i++) {
367 mm_enter_pa(pa + i * PAGE_SIZE,
368 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
369 }
370
371 offset = mm_shift_segment(randva, pagesz, elfsz, elfalign);
372 ASSERT(offset + elfsz <= size);
373
374 pad = pads[segtype];
375 memset((void *)randva, pad, offset);
376 memset((void *)(randva + offset + elfsz), pad, size - elfsz - offset);
377
378 bootspace_addseg(segtype, randva, pa, size);
379
380 return (randva + offset);
381 }
382
383 static void
384 mm_map_boot(void)
385 {
386 size_t i, npages, size;
387 vaddr_t randva;
388 paddr_t bootpa;
389
390 /*
391 * The "boot" region is special: its page tree has a fixed size, but
392 * the number of pages entered is lower.
393 */
394
395 /* Create the page tree */
396 size = (NKL2_KIMG_ENTRIES + 1) * NBPD_L2;
397 randva = mm_randva_kregion(size, PAGE_SIZE);
398
399 /* Enter the area and build the ELF info */
400 bootpa = bootspace_getend();
401 size = (pa_avail - bootpa);
402 npages = size / PAGE_SIZE;
403 for (i = 0; i < npages; i++) {
404 mm_enter_pa(bootpa + i * PAGE_SIZE,
405 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
406 }
407 elf_build_boot(randva, bootpa);
408
409 /* Enter the ISA I/O MEM */
410 iom_base = randva + npages * PAGE_SIZE;
411 npages = IOM_SIZE / PAGE_SIZE;
412 for (i = 0; i < npages; i++) {
413 mm_enter_pa(IOM_BEGIN + i * PAGE_SIZE,
414 iom_base + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
415 }
416
417 /* Register the values in bootspace */
418 bootspace.boot.va = randva;
419 bootspace.boot.pa = bootpa;
420 bootspace.boot.sz = (size_t)(iom_base + IOM_SIZE) -
421 (size_t)bootspace.boot.va;
422
423 /* Initialize the values that are located in the "boot" region */
424 extern uint64_t PDPpaddr;
425 bootspace.spareva = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
426 bootspace.pdir = bootspace.boot.va + (PDPpaddr - bootspace.boot.pa);
427 bootspace.emodule = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
428 }
429
430 /*
431 * There is a variable number of independent regions: one head, several kernel
432 * segments, one boot. They are all mapped at random VAs.
433 *
434 * Head contains the ELF Header and ELF Section Headers, and we use them to
435 * map the rest of the regions. Head must be placed in memory *before* the
436 * other regions.
437 *
438 * At the end of this function, the bootspace structure is fully constructed.
439 */
440 void
441 mm_map_kernel(void)
442 {
443 memset(&bootspace, 0, sizeof(bootspace));
444 mm_map_head();
445 print_state(true, "Head region mapped");
446 elf_map_sections();
447 print_state(true, "Segments mapped");
448 mm_map_boot();
449 print_state(true, "Boot region mapped");
450 }
451