mm.c revision 1.17 1 /* $NetBSD: mm.c,v 1.17 2017/11/15 20:45:16 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Maxime Villard.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "prekern.h"
32
33 #define PAD_TEXT 0xCC
34 #define PAD_RODATA 0x00
35 #define PAD_DATA 0x00
36
37 #define ELFROUND 64
38
39 static const int pads[4] = {
40 [BTSEG_NONE] = 0x00,
41 [BTSEG_TEXT] = 0xCC,
42 [BTSEG_RODATA] = 0x00,
43 [BTSEG_DATA] = 0x00
44 };
45
46 #define MM_PROT_READ 0x00
47 #define MM_PROT_WRITE 0x01
48 #define MM_PROT_EXECUTE 0x02
49
50 static const pt_entry_t protection_codes[3] = {
51 [MM_PROT_READ] = PG_RO | PG_NX,
52 [MM_PROT_WRITE] = PG_RW | PG_NX,
53 [MM_PROT_EXECUTE] = PG_RO,
54 /* RWX does not exist */
55 };
56
57 struct bootspace bootspace;
58
59 extern paddr_t kernpa_start, kernpa_end;
60 vaddr_t iom_base;
61
62 paddr_t pa_avail = 0;
63 static const vaddr_t tmpva = (PREKERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2);
64
65 void
66 mm_init(paddr_t first_pa)
67 {
68 pa_avail = first_pa;
69 }
70
71 static void
72 mm_enter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot)
73 {
74 PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot];
75 }
76
77 static void
78 mm_flush_va(vaddr_t va)
79 {
80 asm volatile("invlpg (%0)" ::"r" (va) : "memory");
81 }
82
83 static paddr_t
84 mm_palloc(size_t npages)
85 {
86 paddr_t pa;
87 size_t i;
88
89 /* Allocate the physical pages */
90 pa = pa_avail;
91 pa_avail += npages * PAGE_SIZE;
92
93 /* Zero them out */
94 for (i = 0; i < npages; i++) {
95 mm_enter_pa(pa + i * PAGE_SIZE, tmpva,
96 MM_PROT_READ|MM_PROT_WRITE);
97 mm_flush_va(tmpva);
98 memset((void *)tmpva, 0, PAGE_SIZE);
99 }
100
101 return pa;
102 }
103
104 static bool
105 mm_pte_is_valid(pt_entry_t pte)
106 {
107 return ((pte & PG_V) != 0);
108 }
109
110 paddr_t
111 mm_vatopa(vaddr_t va)
112 {
113 return (PTE_BASE[pl1_i(va)] & PG_FRAME);
114 }
115
116 static void
117 mm_mprotect(vaddr_t startva, size_t size, pte_prot_t prot)
118 {
119 size_t i, npages;
120 vaddr_t va;
121 paddr_t pa;
122
123 ASSERT(size % PAGE_SIZE == 0);
124 npages = size / PAGE_SIZE;
125
126 for (i = 0; i < npages; i++) {
127 va = startva + i * PAGE_SIZE;
128 pa = (PTE_BASE[pl1_i(va)] & PG_FRAME);
129 mm_enter_pa(pa, va, prot);
130 mm_flush_va(va);
131 }
132 }
133
134 void
135 mm_bootspace_mprotect(void)
136 {
137 pte_prot_t prot;
138 size_t i;
139
140 /* Remap the kernel segments with proper permissions. */
141 for (i = 0; i < BTSPACE_NSEGS; i++) {
142 if (bootspace.segs[i].type == BTSEG_TEXT) {
143 prot = MM_PROT_READ|MM_PROT_EXECUTE;
144 } else if (bootspace.segs[i].type == BTSEG_RODATA) {
145 prot = MM_PROT_READ;
146 } else {
147 continue;
148 }
149 mm_mprotect(bootspace.segs[i].va, bootspace.segs[i].sz, prot);
150 }
151
152 print_state(true, "Segments protection updated");
153 }
154
155 static size_t
156 mm_nentries_range(vaddr_t startva, vaddr_t endva, size_t pgsz)
157 {
158 size_t npages;
159
160 npages = roundup((endva / PAGE_SIZE), (pgsz / PAGE_SIZE)) -
161 rounddown((startva / PAGE_SIZE), (pgsz / PAGE_SIZE));
162 return (npages / (pgsz / PAGE_SIZE));
163 }
164
165 static void
166 mm_map_tree(vaddr_t startva, vaddr_t endva)
167 {
168 size_t i, nL4e, nL3e, nL2e;
169 size_t L4e_idx, L3e_idx, L2e_idx;
170 paddr_t pa;
171
172 /*
173 * Build L4.
174 */
175 L4e_idx = pl4_i(startva);
176 nL4e = mm_nentries_range(startva, endva, NBPD_L4);
177 ASSERT(L4e_idx == 511);
178 ASSERT(nL4e == 1);
179 if (!mm_pte_is_valid(L4_BASE[L4e_idx])) {
180 pa = mm_palloc(1);
181 L4_BASE[L4e_idx] = pa | PG_V | PG_RW;
182 }
183
184 /*
185 * Build L3.
186 */
187 L3e_idx = pl3_i(startva);
188 nL3e = mm_nentries_range(startva, endva, NBPD_L3);
189 for (i = 0; i < nL3e; i++) {
190 if (mm_pte_is_valid(L3_BASE[L3e_idx+i])) {
191 continue;
192 }
193 pa = mm_palloc(1);
194 L3_BASE[L3e_idx+i] = pa | PG_V | PG_RW;
195 }
196
197 /*
198 * Build L2.
199 */
200 L2e_idx = pl2_i(startva);
201 nL2e = mm_nentries_range(startva, endva, NBPD_L2);
202 for (i = 0; i < nL2e; i++) {
203 if (mm_pte_is_valid(L2_BASE[L2e_idx+i])) {
204 continue;
205 }
206 pa = mm_palloc(1);
207 L2_BASE[L2e_idx+i] = pa | PG_V | PG_RW;
208 }
209 }
210
211 static uint64_t
212 mm_rand_num64(void)
213 {
214 /* XXX: yes, this is ridiculous, will be fixed soon */
215 return rdtsc();
216 }
217
218 static void
219 mm_map_head(void)
220 {
221 size_t i, npages, size;
222 uint64_t rnd;
223 vaddr_t randva;
224
225 /*
226 * To get the size of the head, we give a look at the read-only
227 * mapping of the kernel we created in locore. We're identity mapped,
228 * so kernpa = kernva.
229 */
230 size = elf_get_head_size((vaddr_t)kernpa_start);
231 npages = size / PAGE_SIZE;
232
233 rnd = mm_rand_num64();
234 randva = rounddown(HEAD_WINDOW_BASE + rnd % (HEAD_WINDOW_SIZE - size),
235 PAGE_SIZE);
236 mm_map_tree(randva, randva + size);
237
238 /* Enter the area and build the ELF info */
239 for (i = 0; i < npages; i++) {
240 mm_enter_pa(kernpa_start + i * PAGE_SIZE,
241 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
242 }
243 elf_build_head(randva);
244
245 /* Register the values in bootspace */
246 bootspace.head.va = randva;
247 bootspace.head.pa = kernpa_start;
248 bootspace.head.sz = size;
249 }
250
251 static vaddr_t
252 mm_randva_kregion(size_t size, size_t pagesz)
253 {
254 vaddr_t sva, eva;
255 vaddr_t randva;
256 uint64_t rnd;
257 size_t i;
258 bool ok;
259
260 while (1) {
261 rnd = mm_rand_num64();
262 randva = rounddown(KASLR_WINDOW_BASE +
263 rnd % (KASLR_WINDOW_SIZE - size), pagesz);
264
265 /* Detect collisions */
266 ok = true;
267 for (i = 0; i < BTSPACE_NSEGS; i++) {
268 if (bootspace.segs[i].type == BTSEG_NONE) {
269 continue;
270 }
271 sva = bootspace.segs[i].va;
272 eva = sva + bootspace.segs[i].sz;
273
274 if ((sva <= randva) && (randva < eva)) {
275 ok = false;
276 break;
277 }
278 if ((sva < randva + size) && (randva + size <= eva)) {
279 ok = false;
280 break;
281 }
282 }
283 if (ok) {
284 break;
285 }
286 }
287
288 mm_map_tree(randva, randva + size);
289
290 return randva;
291 }
292
293 static paddr_t
294 bootspace_getend(void)
295 {
296 paddr_t pa, max = 0;
297 size_t i;
298
299 for (i = 0; i < BTSPACE_NSEGS; i++) {
300 if (bootspace.segs[i].type == BTSEG_NONE) {
301 continue;
302 }
303 pa = bootspace.segs[i].pa + bootspace.segs[i].sz;
304 if (pa > max)
305 max = pa;
306 }
307
308 return max;
309 }
310
311 static void
312 bootspace_addseg(int type, vaddr_t va, paddr_t pa, size_t sz)
313 {
314 size_t i;
315
316 for (i = 0; i < BTSPACE_NSEGS; i++) {
317 if (bootspace.segs[i].type == BTSEG_NONE) {
318 bootspace.segs[i].type = type;
319 bootspace.segs[i].va = va;
320 bootspace.segs[i].pa = pa;
321 bootspace.segs[i].sz = sz;
322 return;
323 }
324 }
325
326 fatal("bootspace_addseg: segments full");
327 }
328
329 static size_t
330 mm_shift_segment(vaddr_t va, size_t pagesz, size_t elfsz, size_t elfalign)
331 {
332 size_t shiftsize, offset;
333 uint64_t rnd;
334
335 if (elfalign == 0) {
336 elfalign = ELFROUND;
337 }
338
339 ASSERT(pagesz >= elfalign);
340 ASSERT(pagesz % elfalign == 0);
341 shiftsize = roundup(elfsz, pagesz) - roundup(elfsz, elfalign);
342 if (shiftsize == 0) {
343 return 0;
344 }
345
346 rnd = mm_rand_num64();
347 offset = roundup(rnd % shiftsize, elfalign);
348 ASSERT((va + offset) % elfalign == 0);
349
350 memmove((void *)(va + offset), (void *)va, elfsz);
351
352 return offset;
353 }
354
355 vaddr_t
356 mm_map_segment(int segtype, paddr_t pa, size_t elfsz, size_t elfalign)
357 {
358 size_t i, npages, size, pagesz, offset;
359 vaddr_t randva;
360 char pad;
361
362 if (elfsz <= PAGE_SIZE) {
363 pagesz = NBPD_L1;
364 } else {
365 pagesz = NBPD_L2;
366 }
367
368 size = roundup(elfsz, pagesz);
369 randva = mm_randva_kregion(size, pagesz);
370
371 npages = size / PAGE_SIZE;
372 for (i = 0; i < npages; i++) {
373 mm_enter_pa(pa + i * PAGE_SIZE,
374 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
375 }
376
377 offset = mm_shift_segment(randva, pagesz, elfsz, elfalign);
378 ASSERT(offset + elfsz <= size);
379
380 pad = pads[segtype];
381 memset((void *)randva, pad, offset);
382 memset((void *)(randva + offset + elfsz), pad, size - elfsz - offset);
383
384 bootspace_addseg(segtype, randva, pa, size);
385
386 return (randva + offset);
387 }
388
389 static void
390 mm_map_boot(void)
391 {
392 size_t i, npages, size;
393 vaddr_t randva;
394 paddr_t bootpa;
395
396 /*
397 * The "boot" region is special: its page tree has a fixed size, but
398 * the number of pages entered is lower.
399 */
400
401 /* Create the page tree */
402 size = (NKL2_KIMG_ENTRIES + 1) * NBPD_L2;
403 randva = mm_randva_kregion(size, PAGE_SIZE);
404
405 /* Enter the area and build the ELF info */
406 bootpa = bootspace_getend();
407 size = (pa_avail - bootpa);
408 npages = size / PAGE_SIZE;
409 for (i = 0; i < npages; i++) {
410 mm_enter_pa(bootpa + i * PAGE_SIZE,
411 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
412 }
413 elf_build_boot(randva, bootpa);
414
415 /* Enter the ISA I/O MEM */
416 iom_base = randva + npages * PAGE_SIZE;
417 npages = IOM_SIZE / PAGE_SIZE;
418 for (i = 0; i < npages; i++) {
419 mm_enter_pa(IOM_BEGIN + i * PAGE_SIZE,
420 iom_base + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
421 }
422
423 /* Register the values in bootspace */
424 bootspace.boot.va = randva;
425 bootspace.boot.pa = bootpa;
426 bootspace.boot.sz = (size_t)(iom_base + IOM_SIZE) -
427 (size_t)bootspace.boot.va;
428
429 /* Initialize the values that are located in the "boot" region */
430 extern uint64_t PDPpaddr;
431 bootspace.spareva = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
432 bootspace.pdir = bootspace.boot.va + (PDPpaddr - bootspace.boot.pa);
433 bootspace.emodule = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
434 }
435
436 /*
437 * There are five independent regions: head, text, rodata, data, boot. They are
438 * all mapped at random VAs.
439 *
440 * Head contains the ELF Header and ELF Section Headers, and we use them to
441 * map the rest of the regions. Head must be placed in memory *before* the
442 * other regions.
443 *
444 * At the end of this function, the bootspace structure is fully constructed.
445 */
446 void
447 mm_map_kernel(void)
448 {
449 memset(&bootspace, 0, sizeof(bootspace));
450 mm_map_head();
451 print_state(true, "Head region mapped");
452 elf_map_sections();
453 print_state(true, "Segments mapped");
454 mm_map_boot();
455 print_state(true, "Boot region mapped");
456 }
457