mm.c revision 1.17 1 1.17 maxv /* $NetBSD: mm.c,v 1.17 2017/11/15 20:45:16 maxv Exp $ */
2 1.1 maxv
3 1.1 maxv /*
4 1.1 maxv * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved.
5 1.1 maxv *
6 1.1 maxv * This code is derived from software contributed to The NetBSD Foundation
7 1.1 maxv * by Maxime Villard.
8 1.1 maxv *
9 1.1 maxv * Redistribution and use in source and binary forms, with or without
10 1.1 maxv * modification, are permitted provided that the following conditions
11 1.1 maxv * are met:
12 1.1 maxv * 1. Redistributions of source code must retain the above copyright
13 1.1 maxv * notice, this list of conditions and the following disclaimer.
14 1.1 maxv * 2. Redistributions in binary form must reproduce the above copyright
15 1.1 maxv * notice, this list of conditions and the following disclaimer in the
16 1.1 maxv * documentation and/or other materials provided with the distribution.
17 1.1 maxv *
18 1.1 maxv * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 1.1 maxv * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 1.1 maxv * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 1.1 maxv * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 1.1 maxv * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 1.1 maxv * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 1.1 maxv * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 1.1 maxv * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 1.1 maxv * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 1.1 maxv * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 1.1 maxv * POSSIBILITY OF SUCH DAMAGE.
29 1.1 maxv */
30 1.1 maxv
31 1.1 maxv #include "prekern.h"
32 1.1 maxv
33 1.9 maxv #define PAD_TEXT 0xCC
34 1.9 maxv #define PAD_RODATA 0x00
35 1.9 maxv #define PAD_DATA 0x00
36 1.9 maxv
37 1.14 maxv #define ELFROUND 64
38 1.14 maxv
39 1.17 maxv static const int pads[4] = {
40 1.17 maxv [BTSEG_NONE] = 0x00,
41 1.17 maxv [BTSEG_TEXT] = 0xCC,
42 1.17 maxv [BTSEG_RODATA] = 0x00,
43 1.17 maxv [BTSEG_DATA] = 0x00
44 1.17 maxv };
45 1.17 maxv
46 1.15 maxv #define MM_PROT_READ 0x00
47 1.15 maxv #define MM_PROT_WRITE 0x01
48 1.15 maxv #define MM_PROT_EXECUTE 0x02
49 1.15 maxv
50 1.1 maxv static const pt_entry_t protection_codes[3] = {
51 1.1 maxv [MM_PROT_READ] = PG_RO | PG_NX,
52 1.1 maxv [MM_PROT_WRITE] = PG_RW | PG_NX,
53 1.1 maxv [MM_PROT_EXECUTE] = PG_RO,
54 1.1 maxv /* RWX does not exist */
55 1.1 maxv };
56 1.1 maxv
57 1.6 maxv struct bootspace bootspace;
58 1.6 maxv
59 1.1 maxv extern paddr_t kernpa_start, kernpa_end;
60 1.1 maxv vaddr_t iom_base;
61 1.1 maxv
62 1.1 maxv paddr_t pa_avail = 0;
63 1.2 maxv static const vaddr_t tmpva = (PREKERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2);
64 1.1 maxv
65 1.1 maxv void
66 1.1 maxv mm_init(paddr_t first_pa)
67 1.1 maxv {
68 1.1 maxv pa_avail = first_pa;
69 1.1 maxv }
70 1.1 maxv
71 1.1 maxv static void
72 1.1 maxv mm_enter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot)
73 1.1 maxv {
74 1.1 maxv PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot];
75 1.1 maxv }
76 1.1 maxv
77 1.1 maxv static void
78 1.1 maxv mm_flush_va(vaddr_t va)
79 1.1 maxv {
80 1.1 maxv asm volatile("invlpg (%0)" ::"r" (va) : "memory");
81 1.1 maxv }
82 1.1 maxv
83 1.2 maxv static paddr_t
84 1.2 maxv mm_palloc(size_t npages)
85 1.2 maxv {
86 1.2 maxv paddr_t pa;
87 1.2 maxv size_t i;
88 1.2 maxv
89 1.2 maxv /* Allocate the physical pages */
90 1.2 maxv pa = pa_avail;
91 1.2 maxv pa_avail += npages * PAGE_SIZE;
92 1.2 maxv
93 1.2 maxv /* Zero them out */
94 1.2 maxv for (i = 0; i < npages; i++) {
95 1.2 maxv mm_enter_pa(pa + i * PAGE_SIZE, tmpva,
96 1.2 maxv MM_PROT_READ|MM_PROT_WRITE);
97 1.2 maxv mm_flush_va(tmpva);
98 1.2 maxv memset((void *)tmpva, 0, PAGE_SIZE);
99 1.2 maxv }
100 1.2 maxv
101 1.2 maxv return pa;
102 1.2 maxv }
103 1.2 maxv
104 1.3 maxv static bool
105 1.3 maxv mm_pte_is_valid(pt_entry_t pte)
106 1.3 maxv {
107 1.3 maxv return ((pte & PG_V) != 0);
108 1.3 maxv }
109 1.3 maxv
110 1.1 maxv paddr_t
111 1.1 maxv mm_vatopa(vaddr_t va)
112 1.1 maxv {
113 1.1 maxv return (PTE_BASE[pl1_i(va)] & PG_FRAME);
114 1.1 maxv }
115 1.1 maxv
116 1.8 maxv static void
117 1.17 maxv mm_mprotect(vaddr_t startva, size_t size, pte_prot_t prot)
118 1.1 maxv {
119 1.1 maxv size_t i, npages;
120 1.1 maxv vaddr_t va;
121 1.1 maxv paddr_t pa;
122 1.1 maxv
123 1.1 maxv ASSERT(size % PAGE_SIZE == 0);
124 1.1 maxv npages = size / PAGE_SIZE;
125 1.1 maxv
126 1.1 maxv for (i = 0; i < npages; i++) {
127 1.1 maxv va = startva + i * PAGE_SIZE;
128 1.1 maxv pa = (PTE_BASE[pl1_i(va)] & PG_FRAME);
129 1.1 maxv mm_enter_pa(pa, va, prot);
130 1.1 maxv mm_flush_va(va);
131 1.1 maxv }
132 1.1 maxv }
133 1.1 maxv
134 1.8 maxv void
135 1.13 maxv mm_bootspace_mprotect(void)
136 1.8 maxv {
137 1.17 maxv pte_prot_t prot;
138 1.10 maxv size_t i;
139 1.10 maxv
140 1.10 maxv /* Remap the kernel segments with proper permissions. */
141 1.10 maxv for (i = 0; i < BTSPACE_NSEGS; i++) {
142 1.10 maxv if (bootspace.segs[i].type == BTSEG_TEXT) {
143 1.10 maxv prot = MM_PROT_READ|MM_PROT_EXECUTE;
144 1.10 maxv } else if (bootspace.segs[i].type == BTSEG_RODATA) {
145 1.10 maxv prot = MM_PROT_READ;
146 1.10 maxv } else {
147 1.10 maxv continue;
148 1.10 maxv }
149 1.10 maxv mm_mprotect(bootspace.segs[i].va, bootspace.segs[i].sz, prot);
150 1.10 maxv }
151 1.8 maxv
152 1.8 maxv print_state(true, "Segments protection updated");
153 1.8 maxv }
154 1.8 maxv
155 1.5 maxv static size_t
156 1.5 maxv mm_nentries_range(vaddr_t startva, vaddr_t endva, size_t pgsz)
157 1.5 maxv {
158 1.5 maxv size_t npages;
159 1.5 maxv
160 1.5 maxv npages = roundup((endva / PAGE_SIZE), (pgsz / PAGE_SIZE)) -
161 1.5 maxv rounddown((startva / PAGE_SIZE), (pgsz / PAGE_SIZE));
162 1.5 maxv return (npages / (pgsz / PAGE_SIZE));
163 1.5 maxv }
164 1.5 maxv
165 1.1 maxv static void
166 1.2 maxv mm_map_tree(vaddr_t startva, vaddr_t endva)
167 1.1 maxv {
168 1.5 maxv size_t i, nL4e, nL3e, nL2e;
169 1.1 maxv size_t L4e_idx, L3e_idx, L2e_idx;
170 1.3 maxv paddr_t pa;
171 1.3 maxv
172 1.1 maxv /*
173 1.3 maxv * Build L4.
174 1.1 maxv */
175 1.3 maxv L4e_idx = pl4_i(startva);
176 1.5 maxv nL4e = mm_nentries_range(startva, endva, NBPD_L4);
177 1.3 maxv ASSERT(L4e_idx == 511);
178 1.2 maxv ASSERT(nL4e == 1);
179 1.3 maxv if (!mm_pte_is_valid(L4_BASE[L4e_idx])) {
180 1.3 maxv pa = mm_palloc(1);
181 1.3 maxv L4_BASE[L4e_idx] = pa | PG_V | PG_RW;
182 1.3 maxv }
183 1.1 maxv
184 1.1 maxv /*
185 1.3 maxv * Build L3.
186 1.1 maxv */
187 1.3 maxv L3e_idx = pl3_i(startva);
188 1.5 maxv nL3e = mm_nentries_range(startva, endva, NBPD_L3);
189 1.3 maxv for (i = 0; i < nL3e; i++) {
190 1.3 maxv if (mm_pte_is_valid(L3_BASE[L3e_idx+i])) {
191 1.3 maxv continue;
192 1.3 maxv }
193 1.3 maxv pa = mm_palloc(1);
194 1.3 maxv L3_BASE[L3e_idx+i] = pa | PG_V | PG_RW;
195 1.3 maxv }
196 1.1 maxv
197 1.1 maxv /*
198 1.3 maxv * Build L2.
199 1.1 maxv */
200 1.3 maxv L2e_idx = pl2_i(startva);
201 1.5 maxv nL2e = mm_nentries_range(startva, endva, NBPD_L2);
202 1.2 maxv for (i = 0; i < nL2e; i++) {
203 1.3 maxv if (mm_pte_is_valid(L2_BASE[L2e_idx+i])) {
204 1.3 maxv continue;
205 1.3 maxv }
206 1.3 maxv pa = mm_palloc(1);
207 1.3 maxv L2_BASE[L2e_idx+i] = pa | PG_V | PG_RW;
208 1.1 maxv }
209 1.1 maxv }
210 1.1 maxv
211 1.6 maxv static uint64_t
212 1.13 maxv mm_rand_num64(void)
213 1.6 maxv {
214 1.6 maxv /* XXX: yes, this is ridiculous, will be fixed soon */
215 1.6 maxv return rdtsc();
216 1.6 maxv }
217 1.6 maxv
218 1.6 maxv static void
219 1.13 maxv mm_map_head(void)
220 1.6 maxv {
221 1.6 maxv size_t i, npages, size;
222 1.6 maxv uint64_t rnd;
223 1.6 maxv vaddr_t randva;
224 1.6 maxv
225 1.6 maxv /*
226 1.6 maxv * To get the size of the head, we give a look at the read-only
227 1.6 maxv * mapping of the kernel we created in locore. We're identity mapped,
228 1.6 maxv * so kernpa = kernva.
229 1.6 maxv */
230 1.6 maxv size = elf_get_head_size((vaddr_t)kernpa_start);
231 1.6 maxv npages = size / PAGE_SIZE;
232 1.6 maxv
233 1.6 maxv rnd = mm_rand_num64();
234 1.6 maxv randva = rounddown(HEAD_WINDOW_BASE + rnd % (HEAD_WINDOW_SIZE - size),
235 1.6 maxv PAGE_SIZE);
236 1.6 maxv mm_map_tree(randva, randva + size);
237 1.6 maxv
238 1.6 maxv /* Enter the area and build the ELF info */
239 1.6 maxv for (i = 0; i < npages; i++) {
240 1.6 maxv mm_enter_pa(kernpa_start + i * PAGE_SIZE,
241 1.6 maxv randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
242 1.6 maxv }
243 1.6 maxv elf_build_head(randva);
244 1.6 maxv
245 1.6 maxv /* Register the values in bootspace */
246 1.6 maxv bootspace.head.va = randva;
247 1.6 maxv bootspace.head.pa = kernpa_start;
248 1.6 maxv bootspace.head.sz = size;
249 1.6 maxv }
250 1.6 maxv
251 1.1 maxv static vaddr_t
252 1.17 maxv mm_randva_kregion(size_t size, size_t pagesz)
253 1.1 maxv {
254 1.11 maxv vaddr_t sva, eva;
255 1.1 maxv vaddr_t randva;
256 1.1 maxv uint64_t rnd;
257 1.6 maxv size_t i;
258 1.6 maxv bool ok;
259 1.6 maxv
260 1.6 maxv while (1) {
261 1.6 maxv rnd = mm_rand_num64();
262 1.6 maxv randva = rounddown(KASLR_WINDOW_BASE +
263 1.17 maxv rnd % (KASLR_WINDOW_SIZE - size), pagesz);
264 1.6 maxv
265 1.6 maxv /* Detect collisions */
266 1.6 maxv ok = true;
267 1.11 maxv for (i = 0; i < BTSPACE_NSEGS; i++) {
268 1.11 maxv if (bootspace.segs[i].type == BTSEG_NONE) {
269 1.11 maxv continue;
270 1.11 maxv }
271 1.11 maxv sva = bootspace.segs[i].va;
272 1.11 maxv eva = sva + bootspace.segs[i].sz;
273 1.11 maxv
274 1.11 maxv if ((sva <= randva) && (randva < eva)) {
275 1.6 maxv ok = false;
276 1.6 maxv break;
277 1.6 maxv }
278 1.11 maxv if ((sva < randva + size) && (randva + size <= eva)) {
279 1.6 maxv ok = false;
280 1.6 maxv break;
281 1.6 maxv }
282 1.6 maxv }
283 1.6 maxv if (ok) {
284 1.6 maxv break;
285 1.6 maxv }
286 1.6 maxv }
287 1.1 maxv
288 1.2 maxv mm_map_tree(randva, randva + size);
289 1.1 maxv
290 1.1 maxv return randva;
291 1.1 maxv }
292 1.1 maxv
293 1.10 maxv static paddr_t
294 1.13 maxv bootspace_getend(void)
295 1.10 maxv {
296 1.10 maxv paddr_t pa, max = 0;
297 1.10 maxv size_t i;
298 1.10 maxv
299 1.10 maxv for (i = 0; i < BTSPACE_NSEGS; i++) {
300 1.10 maxv if (bootspace.segs[i].type == BTSEG_NONE) {
301 1.10 maxv continue;
302 1.10 maxv }
303 1.10 maxv pa = bootspace.segs[i].pa + bootspace.segs[i].sz;
304 1.10 maxv if (pa > max)
305 1.10 maxv max = pa;
306 1.10 maxv }
307 1.10 maxv
308 1.10 maxv return max;
309 1.10 maxv }
310 1.10 maxv
311 1.10 maxv static void
312 1.10 maxv bootspace_addseg(int type, vaddr_t va, paddr_t pa, size_t sz)
313 1.10 maxv {
314 1.10 maxv size_t i;
315 1.10 maxv
316 1.10 maxv for (i = 0; i < BTSPACE_NSEGS; i++) {
317 1.10 maxv if (bootspace.segs[i].type == BTSEG_NONE) {
318 1.10 maxv bootspace.segs[i].type = type;
319 1.10 maxv bootspace.segs[i].va = va;
320 1.10 maxv bootspace.segs[i].pa = pa;
321 1.10 maxv bootspace.segs[i].sz = sz;
322 1.10 maxv return;
323 1.10 maxv }
324 1.10 maxv }
325 1.10 maxv
326 1.10 maxv fatal("bootspace_addseg: segments full");
327 1.10 maxv }
328 1.10 maxv
329 1.14 maxv static size_t
330 1.14 maxv mm_shift_segment(vaddr_t va, size_t pagesz, size_t elfsz, size_t elfalign)
331 1.14 maxv {
332 1.14 maxv size_t shiftsize, offset;
333 1.14 maxv uint64_t rnd;
334 1.14 maxv
335 1.14 maxv if (elfalign == 0) {
336 1.14 maxv elfalign = ELFROUND;
337 1.14 maxv }
338 1.14 maxv
339 1.17 maxv ASSERT(pagesz >= elfalign);
340 1.17 maxv ASSERT(pagesz % elfalign == 0);
341 1.14 maxv shiftsize = roundup(elfsz, pagesz) - roundup(elfsz, elfalign);
342 1.14 maxv if (shiftsize == 0) {
343 1.14 maxv return 0;
344 1.14 maxv }
345 1.14 maxv
346 1.14 maxv rnd = mm_rand_num64();
347 1.14 maxv offset = roundup(rnd % shiftsize, elfalign);
348 1.14 maxv ASSERT((va + offset) % elfalign == 0);
349 1.14 maxv
350 1.14 maxv memmove((void *)(va + offset), (void *)va, elfsz);
351 1.14 maxv
352 1.14 maxv return offset;
353 1.14 maxv }
354 1.14 maxv
355 1.12 maxv vaddr_t
356 1.14 maxv mm_map_segment(int segtype, paddr_t pa, size_t elfsz, size_t elfalign)
357 1.1 maxv {
358 1.14 maxv size_t i, npages, size, pagesz, offset;
359 1.6 maxv vaddr_t randva;
360 1.12 maxv char pad;
361 1.6 maxv
362 1.16 maxv if (elfsz <= PAGE_SIZE) {
363 1.14 maxv pagesz = NBPD_L1;
364 1.14 maxv } else {
365 1.14 maxv pagesz = NBPD_L2;
366 1.14 maxv }
367 1.14 maxv
368 1.14 maxv size = roundup(elfsz, pagesz);
369 1.14 maxv randva = mm_randva_kregion(size, pagesz);
370 1.14 maxv
371 1.6 maxv npages = size / PAGE_SIZE;
372 1.6 maxv for (i = 0; i < npages; i++) {
373 1.6 maxv mm_enter_pa(pa + i * PAGE_SIZE,
374 1.6 maxv randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
375 1.6 maxv }
376 1.6 maxv
377 1.14 maxv offset = mm_shift_segment(randva, pagesz, elfsz, elfalign);
378 1.14 maxv ASSERT(offset + elfsz <= size);
379 1.14 maxv
380 1.17 maxv pad = pads[segtype];
381 1.14 maxv memset((void *)randva, pad, offset);
382 1.14 maxv memset((void *)(randva + offset + elfsz), pad, size - elfsz - offset);
383 1.6 maxv
384 1.12 maxv bootspace_addseg(segtype, randva, pa, size);
385 1.9 maxv
386 1.14 maxv return (randva + offset);
387 1.6 maxv }
388 1.6 maxv
389 1.6 maxv static void
390 1.13 maxv mm_map_boot(void)
391 1.6 maxv {
392 1.6 maxv size_t i, npages, size;
393 1.6 maxv vaddr_t randva;
394 1.6 maxv paddr_t bootpa;
395 1.6 maxv
396 1.6 maxv /*
397 1.6 maxv * The "boot" region is special: its page tree has a fixed size, but
398 1.6 maxv * the number of pages entered is lower.
399 1.6 maxv */
400 1.6 maxv
401 1.6 maxv /* Create the page tree */
402 1.6 maxv size = (NKL2_KIMG_ENTRIES + 1) * NBPD_L2;
403 1.14 maxv randva = mm_randva_kregion(size, PAGE_SIZE);
404 1.6 maxv
405 1.6 maxv /* Enter the area and build the ELF info */
406 1.10 maxv bootpa = bootspace_getend();
407 1.6 maxv size = (pa_avail - bootpa);
408 1.6 maxv npages = size / PAGE_SIZE;
409 1.6 maxv for (i = 0; i < npages; i++) {
410 1.6 maxv mm_enter_pa(bootpa + i * PAGE_SIZE,
411 1.6 maxv randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
412 1.1 maxv }
413 1.6 maxv elf_build_boot(randva, bootpa);
414 1.1 maxv
415 1.1 maxv /* Enter the ISA I/O MEM */
416 1.6 maxv iom_base = randva + npages * PAGE_SIZE;
417 1.1 maxv npages = IOM_SIZE / PAGE_SIZE;
418 1.1 maxv for (i = 0; i < npages; i++) {
419 1.1 maxv mm_enter_pa(IOM_BEGIN + i * PAGE_SIZE,
420 1.1 maxv iom_base + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
421 1.1 maxv }
422 1.1 maxv
423 1.6 maxv /* Register the values in bootspace */
424 1.6 maxv bootspace.boot.va = randva;
425 1.6 maxv bootspace.boot.pa = bootpa;
426 1.6 maxv bootspace.boot.sz = (size_t)(iom_base + IOM_SIZE) -
427 1.6 maxv (size_t)bootspace.boot.va;
428 1.6 maxv
429 1.6 maxv /* Initialize the values that are located in the "boot" region */
430 1.6 maxv extern uint64_t PDPpaddr;
431 1.6 maxv bootspace.spareva = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
432 1.6 maxv bootspace.pdir = bootspace.boot.va + (PDPpaddr - bootspace.boot.pa);
433 1.6 maxv bootspace.emodule = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
434 1.1 maxv }
435 1.6 maxv
436 1.6 maxv /*
437 1.6 maxv * There are five independent regions: head, text, rodata, data, boot. They are
438 1.6 maxv * all mapped at random VAs.
439 1.6 maxv *
440 1.6 maxv * Head contains the ELF Header and ELF Section Headers, and we use them to
441 1.6 maxv * map the rest of the regions. Head must be placed in memory *before* the
442 1.6 maxv * other regions.
443 1.6 maxv *
444 1.6 maxv * At the end of this function, the bootspace structure is fully constructed.
445 1.6 maxv */
446 1.6 maxv void
447 1.13 maxv mm_map_kernel(void)
448 1.6 maxv {
449 1.6 maxv memset(&bootspace, 0, sizeof(bootspace));
450 1.6 maxv mm_map_head();
451 1.7 maxv print_state(true, "Head region mapped");
452 1.12 maxv elf_map_sections();
453 1.7 maxv print_state(true, "Segments mapped");
454 1.6 maxv mm_map_boot();
455 1.7 maxv print_state(true, "Boot region mapped");
456 1.6 maxv }
457