mm.c revision 1.22 1 /* $NetBSD: mm.c,v 1.22 2018/06/20 11:49:37 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Maxime Villard.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "prekern.h"
32
33 #define ELFROUND 64
34
35 static const uint8_t pads[4] = {
36 [BTSEG_NONE] = 0x00,
37 [BTSEG_TEXT] = 0xCC,
38 [BTSEG_RODATA] = 0x00,
39 [BTSEG_DATA] = 0x00
40 };
41
42 #define MM_PROT_READ 0x00
43 #define MM_PROT_WRITE 0x01
44 #define MM_PROT_EXECUTE 0x02
45
46 static const pt_entry_t protection_codes[3] = {
47 [MM_PROT_READ] = PG_RO | PG_NX,
48 [MM_PROT_WRITE] = PG_RW | PG_NX,
49 [MM_PROT_EXECUTE] = PG_RO,
50 /* RWX does not exist */
51 };
52
53 struct bootspace bootspace;
54
55 extern paddr_t kernpa_start, kernpa_end;
56 vaddr_t iom_base;
57
58 paddr_t pa_avail = 0;
59 static const vaddr_t tmpva = (PREKERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2);
60
61 void
62 mm_init(paddr_t first_pa)
63 {
64 pa_avail = first_pa;
65 }
66
67 static void
68 mm_enter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot)
69 {
70 if (PTE_BASE[pl1_i(va)] & PG_V) {
71 fatal("mm_enter_pa: mapping already present");
72 }
73 PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot];
74 }
75
76 static void
77 mm_reenter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot)
78 {
79 PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot];
80 }
81
82 static void
83 mm_flush_va(vaddr_t va)
84 {
85 asm volatile("invlpg (%0)" ::"r" (va) : "memory");
86 }
87
88 static paddr_t
89 mm_palloc(size_t npages)
90 {
91 paddr_t pa;
92 size_t i;
93
94 /* Allocate the physical pages */
95 pa = pa_avail;
96 pa_avail += npages * PAGE_SIZE;
97
98 /* Zero them out */
99 for (i = 0; i < npages; i++) {
100 mm_reenter_pa(pa + i * PAGE_SIZE, tmpva,
101 MM_PROT_READ|MM_PROT_WRITE);
102 mm_flush_va(tmpva);
103 memset((void *)tmpva, 0, PAGE_SIZE);
104 }
105
106 return pa;
107 }
108
109 static bool
110 mm_pte_is_valid(pt_entry_t pte)
111 {
112 return ((pte & PG_V) != 0);
113 }
114
115 static void
116 mm_mprotect(vaddr_t startva, size_t size, pte_prot_t prot)
117 {
118 size_t i, npages;
119 vaddr_t va;
120 paddr_t pa;
121
122 ASSERT(size % PAGE_SIZE == 0);
123 npages = size / PAGE_SIZE;
124
125 for (i = 0; i < npages; i++) {
126 va = startva + i * PAGE_SIZE;
127 pa = (PTE_BASE[pl1_i(va)] & PG_FRAME);
128 mm_reenter_pa(pa, va, prot);
129 mm_flush_va(va);
130 }
131 }
132
133 void
134 mm_bootspace_mprotect(void)
135 {
136 pte_prot_t prot;
137 size_t i;
138
139 /* Remap the kernel segments with proper permissions. */
140 for (i = 0; i < BTSPACE_NSEGS; i++) {
141 if (bootspace.segs[i].type == BTSEG_TEXT) {
142 prot = MM_PROT_READ|MM_PROT_EXECUTE;
143 } else if (bootspace.segs[i].type == BTSEG_RODATA) {
144 prot = MM_PROT_READ;
145 } else {
146 continue;
147 }
148 mm_mprotect(bootspace.segs[i].va, bootspace.segs[i].sz, prot);
149 }
150
151 print_state(true, "Segments protection updated");
152 }
153
154 static size_t
155 mm_nentries_range(vaddr_t startva, vaddr_t endva, size_t pgsz)
156 {
157 size_t npages;
158
159 npages = roundup((endva / PAGE_SIZE), (pgsz / PAGE_SIZE)) -
160 rounddown((startva / PAGE_SIZE), (pgsz / PAGE_SIZE));
161 return (npages / (pgsz / PAGE_SIZE));
162 }
163
164 static void
165 mm_map_tree(vaddr_t startva, vaddr_t endva)
166 {
167 size_t i, nL4e, nL3e, nL2e;
168 size_t L4e_idx, L3e_idx, L2e_idx;
169 paddr_t pa;
170
171 /* Build L4. */
172 L4e_idx = pl4_i(startva);
173 nL4e = mm_nentries_range(startva, endva, NBPD_L4);
174 ASSERT(L4e_idx == 511);
175 ASSERT(nL4e == 1);
176 if (!mm_pte_is_valid(L4_BASE[L4e_idx])) {
177 pa = mm_palloc(1);
178 L4_BASE[L4e_idx] = pa | PG_V | PG_RW;
179 }
180
181 /* Build L3. */
182 L3e_idx = pl3_i(startva);
183 nL3e = mm_nentries_range(startva, endva, NBPD_L3);
184 for (i = 0; i < nL3e; i++) {
185 if (mm_pte_is_valid(L3_BASE[L3e_idx+i])) {
186 continue;
187 }
188 pa = mm_palloc(1);
189 L3_BASE[L3e_idx+i] = pa | PG_V | PG_RW;
190 }
191
192 /* Build L2. */
193 L2e_idx = pl2_i(startva);
194 nL2e = mm_nentries_range(startva, endva, NBPD_L2);
195 for (i = 0; i < nL2e; i++) {
196 if (mm_pte_is_valid(L2_BASE[L2e_idx+i])) {
197 continue;
198 }
199 pa = mm_palloc(1);
200 L2_BASE[L2e_idx+i] = pa | PG_V | PG_RW;
201 }
202 }
203
204 static vaddr_t
205 mm_randva_kregion(size_t size, size_t pagesz)
206 {
207 vaddr_t sva, eva;
208 vaddr_t randva;
209 uint64_t rnd;
210 size_t i;
211 bool ok;
212
213 while (1) {
214 prng_get_rand(&rnd, sizeof(rnd));
215 randva = rounddown(KASLR_WINDOW_BASE +
216 rnd % (KASLR_WINDOW_SIZE - size), pagesz);
217
218 /* Detect collisions */
219 ok = true;
220 for (i = 0; i < BTSPACE_NSEGS; i++) {
221 if (bootspace.segs[i].type == BTSEG_NONE) {
222 continue;
223 }
224 sva = bootspace.segs[i].va;
225 eva = sva + bootspace.segs[i].sz;
226
227 if ((sva <= randva) && (randva < eva)) {
228 ok = false;
229 break;
230 }
231 if ((sva < randva + size) && (randva + size <= eva)) {
232 ok = false;
233 break;
234 }
235 if (randva < sva && eva < (randva + size)) {
236 ok = false;
237 break;
238 }
239 }
240 if (ok) {
241 break;
242 }
243 }
244
245 mm_map_tree(randva, randva + size);
246
247 return randva;
248 }
249
250 static paddr_t
251 bootspace_getend(void)
252 {
253 paddr_t pa, max = 0;
254 size_t i;
255
256 for (i = 0; i < BTSPACE_NSEGS; i++) {
257 if (bootspace.segs[i].type == BTSEG_NONE) {
258 continue;
259 }
260 pa = bootspace.segs[i].pa + bootspace.segs[i].sz;
261 if (pa > max)
262 max = pa;
263 }
264
265 return max;
266 }
267
268 static void
269 bootspace_addseg(int type, vaddr_t va, paddr_t pa, size_t sz)
270 {
271 size_t i;
272
273 for (i = 0; i < BTSPACE_NSEGS; i++) {
274 if (bootspace.segs[i].type == BTSEG_NONE) {
275 bootspace.segs[i].type = type;
276 bootspace.segs[i].va = va;
277 bootspace.segs[i].pa = pa;
278 bootspace.segs[i].sz = sz;
279 return;
280 }
281 }
282
283 fatal("bootspace_addseg: segments full");
284 }
285
286 static size_t
287 mm_shift_segment(vaddr_t va, size_t pagesz, size_t elfsz, size_t elfalign)
288 {
289 size_t shiftsize, offset;
290 uint64_t rnd;
291
292 if (elfalign == 0) {
293 elfalign = ELFROUND;
294 }
295
296 ASSERT(pagesz >= elfalign);
297 ASSERT(pagesz % elfalign == 0);
298 shiftsize = roundup(elfsz, pagesz) - roundup(elfsz, elfalign);
299 if (shiftsize == 0) {
300 return 0;
301 }
302
303 prng_get_rand(&rnd, sizeof(rnd));
304 offset = roundup(rnd % shiftsize, elfalign);
305 ASSERT((va + offset) % elfalign == 0);
306
307 memmove((void *)(va + offset), (void *)va, elfsz);
308
309 return offset;
310 }
311
312 static void
313 mm_map_head(void)
314 {
315 size_t i, npages, size;
316 uint64_t rnd;
317 vaddr_t randva;
318
319 /*
320 * To get the size of the head, we give a look at the read-only
321 * mapping of the kernel we created in locore. We're identity mapped,
322 * so kernpa = kernva.
323 */
324 size = elf_get_head_size((vaddr_t)kernpa_start);
325 npages = size / PAGE_SIZE;
326
327 prng_get_rand(&rnd, sizeof(rnd));
328 randva = rounddown(HEAD_WINDOW_BASE + rnd % (HEAD_WINDOW_SIZE - size),
329 PAGE_SIZE);
330 mm_map_tree(randva, randva + size);
331
332 /* Enter the area and build the ELF info */
333 for (i = 0; i < npages; i++) {
334 mm_enter_pa(kernpa_start + i * PAGE_SIZE,
335 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
336 }
337 elf_build_head(randva);
338
339 /* Register the values in bootspace */
340 bootspace.head.va = randva;
341 bootspace.head.pa = kernpa_start;
342 bootspace.head.sz = size;
343 }
344
345 vaddr_t
346 mm_map_segment(int segtype, paddr_t pa, size_t elfsz, size_t elfalign)
347 {
348 size_t i, npages, size, pagesz, offset;
349 vaddr_t randva;
350 char pad;
351
352 if (elfsz <= PAGE_SIZE) {
353 pagesz = NBPD_L1;
354 } else {
355 pagesz = NBPD_L2;
356 }
357
358 size = roundup(elfsz, pagesz);
359 randva = mm_randva_kregion(size, pagesz);
360
361 npages = size / PAGE_SIZE;
362 for (i = 0; i < npages; i++) {
363 mm_enter_pa(pa + i * PAGE_SIZE,
364 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
365 }
366
367 offset = mm_shift_segment(randva, pagesz, elfsz, elfalign);
368 ASSERT(offset + elfsz <= size);
369
370 pad = pads[segtype];
371 memset((void *)randva, pad, offset);
372 memset((void *)(randva + offset + elfsz), pad, size - elfsz - offset);
373
374 bootspace_addseg(segtype, randva, pa, size);
375
376 return (randva + offset);
377 }
378
379 static void
380 mm_map_boot(void)
381 {
382 size_t i, npages, size;
383 vaddr_t randva;
384 paddr_t bootpa;
385
386 /*
387 * The "boot" region is special: its page tree has a fixed size, but
388 * the number of pages entered is lower.
389 */
390
391 /* Create the page tree */
392 size = (NKL2_KIMG_ENTRIES + 1) * NBPD_L2;
393 randva = mm_randva_kregion(size, PAGE_SIZE);
394
395 /* Enter the area and build the ELF info */
396 bootpa = bootspace_getend();
397 size = (pa_avail - bootpa);
398 npages = size / PAGE_SIZE;
399 for (i = 0; i < npages; i++) {
400 mm_enter_pa(bootpa + i * PAGE_SIZE,
401 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
402 }
403 elf_build_boot(randva, bootpa);
404
405 /* Enter the ISA I/O MEM */
406 iom_base = randva + npages * PAGE_SIZE;
407 npages = IOM_SIZE / PAGE_SIZE;
408 for (i = 0; i < npages; i++) {
409 mm_enter_pa(IOM_BEGIN + i * PAGE_SIZE,
410 iom_base + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
411 }
412
413 /* Register the values in bootspace */
414 bootspace.boot.va = randva;
415 bootspace.boot.pa = bootpa;
416 bootspace.boot.sz = (size_t)(iom_base + IOM_SIZE) -
417 (size_t)bootspace.boot.va;
418
419 /* Initialize the values that are located in the "boot" region */
420 extern uint64_t PDPpaddr;
421 bootspace.spareva = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
422 bootspace.pdir = bootspace.boot.va + (PDPpaddr - bootspace.boot.pa);
423 bootspace.smodule = (vaddr_t)iom_base + IOM_SIZE;
424 bootspace.emodule = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
425 }
426
427 /*
428 * There is a variable number of independent regions: one head, several kernel
429 * segments, one boot. They are all mapped at random VAs.
430 *
431 * Head contains the ELF Header and ELF Section Headers, and we use them to
432 * map the rest of the regions. Head must be placed in memory *before* the
433 * other regions.
434 *
435 * At the end of this function, the bootspace structure is fully constructed.
436 */
437 void
438 mm_map_kernel(void)
439 {
440 memset(&bootspace, 0, sizeof(bootspace));
441 mm_map_head();
442 print_state(true, "Head region mapped");
443 elf_map_sections();
444 print_state(true, "Segments mapped");
445 mm_map_boot();
446 print_state(true, "Boot region mapped");
447 }
448