mm.c revision 1.9 1 /* $NetBSD: mm.c,v 1.9 2017/11/09 15:24:39 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Maxime Villard.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "prekern.h"
32
33 #define PAD_TEXT 0xCC
34 #define PAD_RODATA 0x00
35 #define PAD_DATA 0x00
36
37 static const pt_entry_t protection_codes[3] = {
38 [MM_PROT_READ] = PG_RO | PG_NX,
39 [MM_PROT_WRITE] = PG_RW | PG_NX,
40 [MM_PROT_EXECUTE] = PG_RO,
41 /* RWX does not exist */
42 };
43
44 struct bootspace bootspace;
45
46 extern paddr_t kernpa_start, kernpa_end;
47 vaddr_t iom_base;
48
49 paddr_t pa_avail = 0;
50 static const vaddr_t tmpva = (PREKERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2);
51
52 void
53 mm_init(paddr_t first_pa)
54 {
55 pa_avail = first_pa;
56 }
57
58 static void
59 mm_enter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot)
60 {
61 PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot];
62 }
63
64 static void
65 mm_flush_va(vaddr_t va)
66 {
67 asm volatile("invlpg (%0)" ::"r" (va) : "memory");
68 }
69
70 static paddr_t
71 mm_palloc(size_t npages)
72 {
73 paddr_t pa;
74 size_t i;
75
76 /* Allocate the physical pages */
77 pa = pa_avail;
78 pa_avail += npages * PAGE_SIZE;
79
80 /* Zero them out */
81 for (i = 0; i < npages; i++) {
82 mm_enter_pa(pa + i * PAGE_SIZE, tmpva,
83 MM_PROT_READ|MM_PROT_WRITE);
84 mm_flush_va(tmpva);
85 memset((void *)tmpva, 0, PAGE_SIZE);
86 }
87
88 return pa;
89 }
90
91 static bool
92 mm_pte_is_valid(pt_entry_t pte)
93 {
94 return ((pte & PG_V) != 0);
95 }
96
97 paddr_t
98 mm_vatopa(vaddr_t va)
99 {
100 return (PTE_BASE[pl1_i(va)] & PG_FRAME);
101 }
102
103 static void
104 mm_mprotect(vaddr_t startva, size_t size, int prot)
105 {
106 size_t i, npages;
107 vaddr_t va;
108 paddr_t pa;
109
110 ASSERT(size % PAGE_SIZE == 0);
111 npages = size / PAGE_SIZE;
112
113 for (i = 0; i < npages; i++) {
114 va = startva + i * PAGE_SIZE;
115 pa = (PTE_BASE[pl1_i(va)] & PG_FRAME);
116 mm_enter_pa(pa, va, prot);
117 mm_flush_va(va);
118 }
119 }
120
121 void
122 mm_bootspace_mprotect()
123 {
124 /*
125 * Remap the kernel segments with proper permissions.
126 */
127 mm_mprotect(bootspace.text.va, bootspace.text.sz,
128 MM_PROT_READ|MM_PROT_EXECUTE);
129 mm_mprotect(bootspace.rodata.va, bootspace.rodata.sz,
130 MM_PROT_READ);
131
132 print_state(true, "Segments protection updated");
133 }
134
135 static size_t
136 mm_nentries_range(vaddr_t startva, vaddr_t endva, size_t pgsz)
137 {
138 size_t npages;
139
140 npages = roundup((endva / PAGE_SIZE), (pgsz / PAGE_SIZE)) -
141 rounddown((startva / PAGE_SIZE), (pgsz / PAGE_SIZE));
142 return (npages / (pgsz / PAGE_SIZE));
143 }
144
145 static void
146 mm_map_tree(vaddr_t startva, vaddr_t endva)
147 {
148 size_t i, nL4e, nL3e, nL2e;
149 size_t L4e_idx, L3e_idx, L2e_idx;
150 paddr_t pa;
151
152 /*
153 * Build L4.
154 */
155 L4e_idx = pl4_i(startva);
156 nL4e = mm_nentries_range(startva, endva, NBPD_L4);
157 ASSERT(L4e_idx == 511);
158 ASSERT(nL4e == 1);
159 if (!mm_pte_is_valid(L4_BASE[L4e_idx])) {
160 pa = mm_palloc(1);
161 L4_BASE[L4e_idx] = pa | PG_V | PG_RW;
162 }
163
164 /*
165 * Build L3.
166 */
167 L3e_idx = pl3_i(startva);
168 nL3e = mm_nentries_range(startva, endva, NBPD_L3);
169 for (i = 0; i < nL3e; i++) {
170 if (mm_pte_is_valid(L3_BASE[L3e_idx+i])) {
171 continue;
172 }
173 pa = mm_palloc(1);
174 L3_BASE[L3e_idx+i] = pa | PG_V | PG_RW;
175 }
176
177 /*
178 * Build L2.
179 */
180 L2e_idx = pl2_i(startva);
181 nL2e = mm_nentries_range(startva, endva, NBPD_L2);
182 for (i = 0; i < nL2e; i++) {
183 if (mm_pte_is_valid(L2_BASE[L2e_idx+i])) {
184 continue;
185 }
186 pa = mm_palloc(1);
187 L2_BASE[L2e_idx+i] = pa | PG_V | PG_RW;
188 }
189 }
190
191 static uint64_t
192 mm_rand_num64()
193 {
194 /* XXX: yes, this is ridiculous, will be fixed soon */
195 return rdtsc();
196 }
197
198 static void
199 mm_map_head()
200 {
201 size_t i, npages, size;
202 uint64_t rnd;
203 vaddr_t randva;
204
205 /*
206 * To get the size of the head, we give a look at the read-only
207 * mapping of the kernel we created in locore. We're identity mapped,
208 * so kernpa = kernva.
209 */
210 size = elf_get_head_size((vaddr_t)kernpa_start);
211 npages = size / PAGE_SIZE;
212
213 rnd = mm_rand_num64();
214 randva = rounddown(HEAD_WINDOW_BASE + rnd % (HEAD_WINDOW_SIZE - size),
215 PAGE_SIZE);
216 mm_map_tree(randva, randva + size);
217
218 /* Enter the area and build the ELF info */
219 for (i = 0; i < npages; i++) {
220 mm_enter_pa(kernpa_start + i * PAGE_SIZE,
221 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
222 }
223 elf_build_head(randva);
224
225 /* Register the values in bootspace */
226 bootspace.head.va = randva;
227 bootspace.head.pa = kernpa_start;
228 bootspace.head.sz = size;
229 }
230
231 static vaddr_t
232 mm_randva_kregion(size_t size)
233 {
234 static struct {
235 vaddr_t sva;
236 vaddr_t eva;
237 } regions[4];
238 static size_t idx = 0;
239 vaddr_t randva;
240 uint64_t rnd;
241 size_t i;
242 bool ok;
243
244 ASSERT(idx < 4);
245
246 while (1) {
247 rnd = mm_rand_num64();
248 randva = rounddown(KASLR_WINDOW_BASE +
249 rnd % (KASLR_WINDOW_SIZE - size), PAGE_SIZE);
250
251 /* Detect collisions */
252 ok = true;
253 for (i = 0; i < idx; i++) {
254 if ((regions[i].sva <= randva) &&
255 (randva < regions[i].eva)) {
256 ok = false;
257 break;
258 }
259 if ((regions[i].sva < randva + size) &&
260 (randva + size <= regions[i].eva)) {
261 ok = false;
262 break;
263 }
264 }
265 if (ok) {
266 break;
267 }
268 }
269
270 regions[idx].eva = randva;
271 regions[idx].sva = randva + size;
272 idx++;
273
274 mm_map_tree(randva, randva + size);
275
276 return randva;
277 }
278
279 static void
280 mm_map_segments()
281 {
282 size_t i, npages, size, elfsz;
283 vaddr_t randva;
284 paddr_t pa;
285
286 /*
287 * Kernel text segment.
288 */
289 elf_get_text(&pa, &elfsz);
290 size = roundup(elfsz, PAGE_SIZE);
291 randva = mm_randva_kregion(size);
292 npages = size / PAGE_SIZE;
293
294 /* Enter the area and build the ELF info */
295 for (i = 0; i < npages; i++) {
296 mm_enter_pa(pa + i * PAGE_SIZE,
297 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
298 }
299 elf_build_text(randva, pa);
300
301 /* Fill in the padding */
302 memset((void *)(randva + elfsz), PAD_TEXT, size - elfsz);
303
304 /* Register the values in bootspace */
305 bootspace.text.va = randva;
306 bootspace.text.pa = pa;
307 bootspace.text.sz = size;
308
309 /*
310 * Kernel rodata segment.
311 */
312 elf_get_rodata(&pa, &elfsz);
313 size = roundup(elfsz, PAGE_SIZE);
314 randva = mm_randva_kregion(size);
315 npages = size / PAGE_SIZE;
316
317 /* Enter the area and build the ELF info */
318 for (i = 0; i < npages; i++) {
319 mm_enter_pa(pa + i * PAGE_SIZE,
320 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
321 }
322 elf_build_rodata(randva, pa);
323
324 /* Fill in the padding */
325 memset((void *)(randva + elfsz), PAD_RODATA, size - elfsz);
326
327 /* Register the values in bootspace */
328 bootspace.rodata.va = randva;
329 bootspace.rodata.pa = pa;
330 bootspace.rodata.sz = size;
331
332 /*
333 * Kernel data segment.
334 */
335 elf_get_data(&pa, &elfsz);
336 size = roundup(elfsz, PAGE_SIZE);
337 randva = mm_randva_kregion(size);
338 npages = size / PAGE_SIZE;
339
340 /* Enter the area and build the ELF info */
341 for (i = 0; i < npages; i++) {
342 mm_enter_pa(pa + i * PAGE_SIZE,
343 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
344 }
345 elf_build_data(randva, pa);
346
347 /* Fill in the padding */
348 memset((void *)(randva + elfsz), PAD_DATA, size - elfsz);
349
350 /* Register the values in bootspace */
351 bootspace.data.va = randva;
352 bootspace.data.pa = pa;
353 bootspace.data.sz = size;
354 }
355
356 static void
357 mm_map_boot()
358 {
359 size_t i, npages, size;
360 vaddr_t randva;
361 paddr_t bootpa;
362
363 /*
364 * The "boot" region is special: its page tree has a fixed size, but
365 * the number of pages entered is lower.
366 */
367
368 /* Create the page tree */
369 size = (NKL2_KIMG_ENTRIES + 1) * NBPD_L2;
370 randva = mm_randva_kregion(size);
371
372 /* Enter the area and build the ELF info */
373 bootpa = bootspace.data.pa + bootspace.data.sz;
374 size = (pa_avail - bootpa);
375 npages = size / PAGE_SIZE;
376 for (i = 0; i < npages; i++) {
377 mm_enter_pa(bootpa + i * PAGE_SIZE,
378 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
379 }
380 elf_build_boot(randva, bootpa);
381
382 /* Enter the ISA I/O MEM */
383 iom_base = randva + npages * PAGE_SIZE;
384 npages = IOM_SIZE / PAGE_SIZE;
385 for (i = 0; i < npages; i++) {
386 mm_enter_pa(IOM_BEGIN + i * PAGE_SIZE,
387 iom_base + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
388 }
389
390 /* Register the values in bootspace */
391 bootspace.boot.va = randva;
392 bootspace.boot.pa = bootpa;
393 bootspace.boot.sz = (size_t)(iom_base + IOM_SIZE) -
394 (size_t)bootspace.boot.va;
395
396 /* Initialize the values that are located in the "boot" region */
397 extern uint64_t PDPpaddr;
398 bootspace.spareva = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
399 bootspace.pdir = bootspace.boot.va + (PDPpaddr - bootspace.boot.pa);
400 bootspace.emodule = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
401 }
402
403 /*
404 * There are five independent regions: head, text, rodata, data, boot. They are
405 * all mapped at random VAs.
406 *
407 * Head contains the ELF Header and ELF Section Headers, and we use them to
408 * map the rest of the regions. Head must be placed in memory *before* the
409 * other regions.
410 *
411 * At the end of this function, the bootspace structure is fully constructed.
412 */
413 void
414 mm_map_kernel()
415 {
416 memset(&bootspace, 0, sizeof(bootspace));
417 mm_map_head();
418 print_state(true, "Head region mapped");
419 mm_map_segments();
420 print_state(true, "Segments mapped");
421 mm_map_boot();
422 print_state(true, "Boot region mapped");
423 }
424
425