mm.c revision 1.14 1 /* $NetBSD: mm.c,v 1.14 2017/11/15 18:02:36 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Maxime Villard.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "prekern.h"
32
33 #define PAD_TEXT 0xCC
34 #define PAD_RODATA 0x00
35 #define PAD_DATA 0x00
36
37 #define ELFROUND 64
38
39 static const pt_entry_t protection_codes[3] = {
40 [MM_PROT_READ] = PG_RO | PG_NX,
41 [MM_PROT_WRITE] = PG_RW | PG_NX,
42 [MM_PROT_EXECUTE] = PG_RO,
43 /* RWX does not exist */
44 };
45
46 struct bootspace bootspace;
47
48 extern paddr_t kernpa_start, kernpa_end;
49 vaddr_t iom_base;
50
51 paddr_t pa_avail = 0;
52 static const vaddr_t tmpva = (PREKERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2);
53
54 void
55 mm_init(paddr_t first_pa)
56 {
57 pa_avail = first_pa;
58 }
59
60 static void
61 mm_enter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot)
62 {
63 PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot];
64 }
65
66 static void
67 mm_flush_va(vaddr_t va)
68 {
69 asm volatile("invlpg (%0)" ::"r" (va) : "memory");
70 }
71
72 static paddr_t
73 mm_palloc(size_t npages)
74 {
75 paddr_t pa;
76 size_t i;
77
78 /* Allocate the physical pages */
79 pa = pa_avail;
80 pa_avail += npages * PAGE_SIZE;
81
82 /* Zero them out */
83 for (i = 0; i < npages; i++) {
84 mm_enter_pa(pa + i * PAGE_SIZE, tmpva,
85 MM_PROT_READ|MM_PROT_WRITE);
86 mm_flush_va(tmpva);
87 memset((void *)tmpva, 0, PAGE_SIZE);
88 }
89
90 return pa;
91 }
92
93 static bool
94 mm_pte_is_valid(pt_entry_t pte)
95 {
96 return ((pte & PG_V) != 0);
97 }
98
99 paddr_t
100 mm_vatopa(vaddr_t va)
101 {
102 return (PTE_BASE[pl1_i(va)] & PG_FRAME);
103 }
104
105 static void
106 mm_mprotect(vaddr_t startva, size_t size, int prot)
107 {
108 size_t i, npages;
109 vaddr_t va;
110 paddr_t pa;
111
112 ASSERT(size % PAGE_SIZE == 0);
113 npages = size / PAGE_SIZE;
114
115 for (i = 0; i < npages; i++) {
116 va = startva + i * PAGE_SIZE;
117 pa = (PTE_BASE[pl1_i(va)] & PG_FRAME);
118 mm_enter_pa(pa, va, prot);
119 mm_flush_va(va);
120 }
121 }
122
123 void
124 mm_bootspace_mprotect(void)
125 {
126 int prot;
127 size_t i;
128
129 /* Remap the kernel segments with proper permissions. */
130 for (i = 0; i < BTSPACE_NSEGS; i++) {
131 if (bootspace.segs[i].type == BTSEG_TEXT) {
132 prot = MM_PROT_READ|MM_PROT_EXECUTE;
133 } else if (bootspace.segs[i].type == BTSEG_RODATA) {
134 prot = MM_PROT_READ;
135 } else {
136 continue;
137 }
138 mm_mprotect(bootspace.segs[i].va, bootspace.segs[i].sz, prot);
139 }
140
141 print_state(true, "Segments protection updated");
142 }
143
144 static size_t
145 mm_nentries_range(vaddr_t startva, vaddr_t endva, size_t pgsz)
146 {
147 size_t npages;
148
149 npages = roundup((endva / PAGE_SIZE), (pgsz / PAGE_SIZE)) -
150 rounddown((startva / PAGE_SIZE), (pgsz / PAGE_SIZE));
151 return (npages / (pgsz / PAGE_SIZE));
152 }
153
154 static void
155 mm_map_tree(vaddr_t startva, vaddr_t endva)
156 {
157 size_t i, nL4e, nL3e, nL2e;
158 size_t L4e_idx, L3e_idx, L2e_idx;
159 paddr_t pa;
160
161 /*
162 * Build L4.
163 */
164 L4e_idx = pl4_i(startva);
165 nL4e = mm_nentries_range(startva, endva, NBPD_L4);
166 ASSERT(L4e_idx == 511);
167 ASSERT(nL4e == 1);
168 if (!mm_pte_is_valid(L4_BASE[L4e_idx])) {
169 pa = mm_palloc(1);
170 L4_BASE[L4e_idx] = pa | PG_V | PG_RW;
171 }
172
173 /*
174 * Build L3.
175 */
176 L3e_idx = pl3_i(startva);
177 nL3e = mm_nentries_range(startva, endva, NBPD_L3);
178 for (i = 0; i < nL3e; i++) {
179 if (mm_pte_is_valid(L3_BASE[L3e_idx+i])) {
180 continue;
181 }
182 pa = mm_palloc(1);
183 L3_BASE[L3e_idx+i] = pa | PG_V | PG_RW;
184 }
185
186 /*
187 * Build L2.
188 */
189 L2e_idx = pl2_i(startva);
190 nL2e = mm_nentries_range(startva, endva, NBPD_L2);
191 for (i = 0; i < nL2e; i++) {
192 if (mm_pte_is_valid(L2_BASE[L2e_idx+i])) {
193 continue;
194 }
195 pa = mm_palloc(1);
196 L2_BASE[L2e_idx+i] = pa | PG_V | PG_RW;
197 }
198 }
199
200 static uint64_t
201 mm_rand_num64(void)
202 {
203 /* XXX: yes, this is ridiculous, will be fixed soon */
204 return rdtsc();
205 }
206
207 static void
208 mm_map_head(void)
209 {
210 size_t i, npages, size;
211 uint64_t rnd;
212 vaddr_t randva;
213
214 /*
215 * To get the size of the head, we give a look at the read-only
216 * mapping of the kernel we created in locore. We're identity mapped,
217 * so kernpa = kernva.
218 */
219 size = elf_get_head_size((vaddr_t)kernpa_start);
220 npages = size / PAGE_SIZE;
221
222 rnd = mm_rand_num64();
223 randva = rounddown(HEAD_WINDOW_BASE + rnd % (HEAD_WINDOW_SIZE - size),
224 PAGE_SIZE);
225 mm_map_tree(randva, randva + size);
226
227 /* Enter the area and build the ELF info */
228 for (i = 0; i < npages; i++) {
229 mm_enter_pa(kernpa_start + i * PAGE_SIZE,
230 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
231 }
232 elf_build_head(randva);
233
234 /* Register the values in bootspace */
235 bootspace.head.va = randva;
236 bootspace.head.pa = kernpa_start;
237 bootspace.head.sz = size;
238 }
239
240 static vaddr_t
241 mm_randva_kregion(size_t size, size_t align)
242 {
243 vaddr_t sva, eva;
244 vaddr_t randva;
245 uint64_t rnd;
246 size_t i;
247 bool ok;
248
249 while (1) {
250 rnd = mm_rand_num64();
251 randva = rounddown(KASLR_WINDOW_BASE +
252 rnd % (KASLR_WINDOW_SIZE - size), align);
253
254 /* Detect collisions */
255 ok = true;
256 for (i = 0; i < BTSPACE_NSEGS; i++) {
257 if (bootspace.segs[i].type == BTSEG_NONE) {
258 continue;
259 }
260 sva = bootspace.segs[i].va;
261 eva = sva + bootspace.segs[i].sz;
262
263 if ((sva <= randva) && (randva < eva)) {
264 ok = false;
265 break;
266 }
267 if ((sva < randva + size) && (randva + size <= eva)) {
268 ok = false;
269 break;
270 }
271 }
272 if (ok) {
273 break;
274 }
275 }
276
277 mm_map_tree(randva, randva + size);
278
279 return randva;
280 }
281
282 static paddr_t
283 bootspace_getend(void)
284 {
285 paddr_t pa, max = 0;
286 size_t i;
287
288 for (i = 0; i < BTSPACE_NSEGS; i++) {
289 if (bootspace.segs[i].type == BTSEG_NONE) {
290 continue;
291 }
292 pa = bootspace.segs[i].pa + bootspace.segs[i].sz;
293 if (pa > max)
294 max = pa;
295 }
296
297 return max;
298 }
299
300 static void
301 bootspace_addseg(int type, vaddr_t va, paddr_t pa, size_t sz)
302 {
303 size_t i;
304
305 for (i = 0; i < BTSPACE_NSEGS; i++) {
306 if (bootspace.segs[i].type == BTSEG_NONE) {
307 bootspace.segs[i].type = type;
308 bootspace.segs[i].va = va;
309 bootspace.segs[i].pa = pa;
310 bootspace.segs[i].sz = sz;
311 return;
312 }
313 }
314
315 fatal("bootspace_addseg: segments full");
316 }
317
318 static size_t
319 mm_shift_segment(vaddr_t va, size_t pagesz, size_t elfsz, size_t elfalign)
320 {
321 size_t shiftsize, offset;
322 uint64_t rnd;
323
324 if (elfalign == 0) {
325 elfalign = ELFROUND;
326 }
327
328 shiftsize = roundup(elfsz, pagesz) - roundup(elfsz, elfalign);
329 if (shiftsize == 0) {
330 return 0;
331 }
332
333 rnd = mm_rand_num64();
334 offset = roundup(rnd % shiftsize, elfalign);
335 ASSERT((va + offset) % elfalign == 0);
336
337 memmove((void *)(va + offset), (void *)va, elfsz);
338
339 return offset;
340 }
341
342 vaddr_t
343 mm_map_segment(int segtype, paddr_t pa, size_t elfsz, size_t elfalign)
344 {
345 size_t i, npages, size, pagesz, offset;
346 vaddr_t randva;
347 char pad;
348
349 if (elfsz < PAGE_SIZE) {
350 pagesz = NBPD_L1;
351 } else {
352 pagesz = NBPD_L2;
353 }
354
355 size = roundup(elfsz, pagesz);
356 randva = mm_randva_kregion(size, pagesz);
357
358 npages = size / PAGE_SIZE;
359 for (i = 0; i < npages; i++) {
360 mm_enter_pa(pa + i * PAGE_SIZE,
361 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
362 }
363
364 offset = mm_shift_segment(randva, pagesz, elfsz, elfalign);
365 ASSERT(offset + elfsz <= size);
366
367 if (segtype == BTSEG_TEXT) {
368 pad = PAD_TEXT;
369 } else if (segtype == BTSEG_RODATA) {
370 pad = PAD_RODATA;
371 } else {
372 pad = PAD_DATA;
373 }
374 memset((void *)randva, pad, offset);
375 memset((void *)(randva + offset + elfsz), pad, size - elfsz - offset);
376
377 bootspace_addseg(segtype, randva, pa, size);
378
379 return (randva + offset);
380 }
381
382 static void
383 mm_map_boot(void)
384 {
385 size_t i, npages, size;
386 vaddr_t randva;
387 paddr_t bootpa;
388
389 /*
390 * The "boot" region is special: its page tree has a fixed size, but
391 * the number of pages entered is lower.
392 */
393
394 /* Create the page tree */
395 size = (NKL2_KIMG_ENTRIES + 1) * NBPD_L2;
396 randva = mm_randva_kregion(size, PAGE_SIZE);
397
398 /* Enter the area and build the ELF info */
399 bootpa = bootspace_getend();
400 size = (pa_avail - bootpa);
401 npages = size / PAGE_SIZE;
402 for (i = 0; i < npages; i++) {
403 mm_enter_pa(bootpa + i * PAGE_SIZE,
404 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
405 }
406 elf_build_boot(randva, bootpa);
407
408 /* Enter the ISA I/O MEM */
409 iom_base = randva + npages * PAGE_SIZE;
410 npages = IOM_SIZE / PAGE_SIZE;
411 for (i = 0; i < npages; i++) {
412 mm_enter_pa(IOM_BEGIN + i * PAGE_SIZE,
413 iom_base + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
414 }
415
416 /* Register the values in bootspace */
417 bootspace.boot.va = randva;
418 bootspace.boot.pa = bootpa;
419 bootspace.boot.sz = (size_t)(iom_base + IOM_SIZE) -
420 (size_t)bootspace.boot.va;
421
422 /* Initialize the values that are located in the "boot" region */
423 extern uint64_t PDPpaddr;
424 bootspace.spareva = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
425 bootspace.pdir = bootspace.boot.va + (PDPpaddr - bootspace.boot.pa);
426 bootspace.emodule = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
427 }
428
429 /*
430 * There are five independent regions: head, text, rodata, data, boot. They are
431 * all mapped at random VAs.
432 *
433 * Head contains the ELF Header and ELF Section Headers, and we use them to
434 * map the rest of the regions. Head must be placed in memory *before* the
435 * other regions.
436 *
437 * At the end of this function, the bootspace structure is fully constructed.
438 */
439 void
440 mm_map_kernel(void)
441 {
442 memset(&bootspace, 0, sizeof(bootspace));
443 mm_map_head();
444 print_state(true, "Head region mapped");
445 elf_map_sections();
446 print_state(true, "Segments mapped");
447 mm_map_boot();
448 print_state(true, "Boot region mapped");
449 }
450