riscv_machdep.c revision 1.44 1 /* $NetBSD: riscv_machdep.c,v 1.44 2025/09/06 21:02:41 thorpej Exp $ */
2
3 /*-
4 * Copyright (c) 2014, 2019, 2022 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matt Thomas of 3am Software Foundry, and by Nick Hudson.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include "opt_ddb.h"
33 #include "opt_modular.h"
34 #include "opt_multiprocessor.h"
35 #include "opt_riscv_debug.h"
36
37 #include <sys/cdefs.h>
38 __RCSID("$NetBSD: riscv_machdep.c,v 1.44 2025/09/06 21:02:41 thorpej Exp $");
39
40 #include <sys/param.h>
41
42 #include <sys/asan.h>
43 #include <sys/boot_flag.h>
44 #include <sys/cpu.h>
45 #include <sys/exec.h>
46 #include <sys/kmem.h>
47 #include <sys/ktrace.h>
48 #include <sys/lwp.h>
49 #include <sys/module.h>
50 #include <sys/mount.h>
51 #include <sys/msgbuf.h>
52 #include <sys/optstr.h>
53 #include <sys/proc.h>
54 #include <sys/reboot.h>
55 #include <sys/syscall.h>
56 #include <sys/sysctl.h>
57 #include <sys/systm.h>
58
59 #include <dev/cons.h>
60 #ifdef __HAVE_MM_MD_KERNACC
61 #include <dev/mm.h>
62 #endif
63
64 #include <uvm/uvm_extern.h>
65
66 #include <riscv/frame.h>
67 #include <riscv/locore.h>
68 #include <riscv/machdep.h>
69 #include <riscv/pte.h>
70 #include <riscv/sbi.h>
71 #include <riscv/userret.h>
72
73 #include <libfdt.h>
74 #include <dev/fdt/fdtvar.h>
75 #include <dev/fdt/fdt_boot.h>
76 #include <dev/fdt/fdt_memory.h>
77 #include <dev/fdt/fdt_private.h>
78 #include <dev/fdt/fdt_platform.h>
79
80 int cpu_printfataltraps = 1;
81 char machine[] = MACHINE;
82 char machine_arch[] = MACHINE_ARCH;
83
84 #ifdef VERBOSE_INIT_RISCV
85 #define VPRINTF(...) printf(__VA_ARGS__)
86 #else
87 #define VPRINTF(...) __nothing
88 #endif
89
90 /* 64 should be enough, even for a ZFS UUID */
91 #define MAX_BOOT_DEV_STR 64
92
93 char bootdevstr[MAX_BOOT_DEV_STR] = "";
94 char *boot_args = NULL;
95
96 paddr_t physical_start;
97 paddr_t physical_end;
98
99 static void
100 earlyconsputc(dev_t dev, int c)
101 {
102 uartputc(c);
103 }
104
105 static int
106 earlyconsgetc(dev_t dev)
107 {
108 return uartgetc();
109 }
110
111 static struct consdev earlycons = {
112 .cn_putc = earlyconsputc,
113 .cn_getc = earlyconsgetc,
114 .cn_pollc = nullcnpollc,
115 };
116
117 struct vm_map *phys_map;
118
119 struct trapframe cpu_ddb_regs;
120 const pcu_ops_t * const pcu_ops_md_defs[PCU_UNIT_COUNT] = {
121 #ifdef FPE
122 [PCU_FPU] = &pcu_fpu_ops,
123 #endif
124 };
125
126 /*
127 * Used by PHYSTOV and VTOPHYS -- Will be set be BSS is zeroed so
128 * keep it in data
129 */
130 unsigned long kern_vtopdiff __attribute__((__section__(".data")));
131
132
133 /*
134 * machine dependent system variables.
135 */
136 SYSCTL_SETUP(sysctl_machdep_setup, "sysctl machdep subtree setup")
137 {
138 sysctl_createv(clog, 0, NULL, NULL,
139 CTLFLAG_PERMANENT,
140 CTLTYPE_NODE, "machdep", NULL,
141 NULL, 0, NULL, 0,
142 CTL_MACHDEP, CTL_EOL);
143 }
144
145 #ifdef MODULAR
146 /*
147 * Push any modules loaded by the boot loader.
148 */
149 void
150 module_init_md(void)
151 {
152 }
153 #endif /* MODULAR */
154
155 /*
156 * Set registers on exec.
157 * Clear all registers except sp, pc.
158 * sp is set to the stack pointer passed in. pc is set to the entry
159 * point given by the exec_package passed in.
160 */
161 void
162 setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
163 {
164 struct trapframe * const tf = l->l_md.md_utf;
165 struct proc * const p = l->l_proc;
166
167 memset(tf, 0, sizeof(*tf));
168 tf->tf_sp = (intptr_t)stack_align(stack);
169 tf->tf_pc = (intptr_t)pack->ep_entry & ~1;
170 #ifdef _LP64
171 tf->tf_sr = (p->p_flag & PK_32) ? SR_USER32 : SR_USER64;
172 #else
173 tf->tf_sr = SR_USER;
174 #endif
175
176 // Set up arguments for ___start(cleanup, ps_strings)
177 tf->tf_a0 = 0; // cleanup
178 tf->tf_a1 = p->p_psstrp; // ps_strings
179
180 /*
181 * Must have interrupts disabled for exception return.
182 * Must be switching to user mode.
183 * Must enable interrupts after sret.
184 */
185 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SIE) == 0);
186 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPP) == 0);
187 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPIE) != 0);
188 }
189
190 void
191 md_child_return(struct lwp *l)
192 {
193 struct trapframe * const tf = lwp_trapframe(l);
194
195 tf->tf_a0 = 0;
196 tf->tf_a1 = 1;
197 #ifdef FPE
198 /* Disable FP as we can't be using it (yet). */
199 tf->tf_sr &= ~SR_FS;
200 #endif
201
202 /*
203 * Must have interrupts disabled for exception return.
204 * Must be switching to user mode.
205 * Must enable interrupts after sret.
206 */
207
208 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SIE) == 0);
209 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPP) == 0);
210 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPIE) != 0);
211
212 userret(l);
213 }
214
215 /*
216 * Process the tail end of a posix_spawn() for the child.
217 */
218 void
219 cpu_spawn_return(struct lwp *l)
220 {
221 userret(l);
222 }
223
224 /*
225 * Start a new LWP
226 */
227 void
228 startlwp(void *arg)
229 {
230 ucontext_t * const uc = arg;
231 lwp_t * const l = curlwp;
232 int error __diagused;
233
234 error = cpu_setmcontext(l, &uc->uc_mcontext, uc->uc_flags);
235 KASSERT(error == 0);
236
237 kmem_free(uc, sizeof(*uc));
238 userret(l);
239 }
240
241 // We've worked hard to make sure struct reg and __gregset_t are the same.
242 // Ditto for struct fpreg and fregset_t.
243
244 #ifdef _LP64
245 CTASSERT(sizeof(struct reg) == sizeof(__gregset_t));
246 #endif
247 CTASSERT(sizeof(struct fpreg) == sizeof(__fregset_t));
248
249 void
250 cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags)
251 {
252 const struct trapframe * const tf = l->l_md.md_utf;
253
254 /* Save register context. */
255 *(struct reg *)mcp->__gregs = tf->tf_regs;
256
257 *flags |= _UC_CPU | _UC_TLSBASE;
258
259 /* Save floating point register context, if any. */
260 KASSERT(l == curlwp);
261 if (fpu_valid_p(l)) {
262 /*
263 * If this process is the current FP owner, dump its
264 * context to the PCB first.
265 */
266 fpu_save(l);
267
268 struct pcb * const pcb = lwp_getpcb(l);
269 *(struct fpreg *)mcp->__fregs = pcb->pcb_fpregs;
270 *flags |= _UC_FPU;
271 }
272 }
273
274 int
275 cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp)
276 {
277 /*
278 * Verify that at least the PC and SP are user addresses.
279 */
280 if ((intptr_t) mcp->__gregs[_REG_PC] < 0
281 || (intptr_t) mcp->__gregs[_REG_SP] < 0
282 || (mcp->__gregs[_REG_PC] & 1))
283 return EINVAL;
284
285 return 0;
286 }
287
288 int
289 cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags)
290 {
291 struct trapframe * const tf = l->l_md.md_utf;
292 struct proc * const p = l->l_proc;
293 const __greg_t * const gr = mcp->__gregs;
294 int error;
295
296 /* Restore register context, if any. */
297 if (flags & _UC_CPU) {
298 error = cpu_mcontext_validate(l, mcp);
299 if (error)
300 return error;
301
302 /*
303 * Avoid updating TLS register here.
304 */
305 const __greg_t saved_tp = tf->tf_reg[_REG_TP];
306 tf->tf_regs = *(const struct reg *)gr;
307 tf->tf_reg[_REG_TP] = saved_tp;
308 }
309
310 /* Restore the private thread context */
311 if (flags & _UC_TLSBASE) {
312 lwp_setprivate(l, (void *)(intptr_t)mcp->__gregs[_X_TP]);
313 }
314
315 /* Restore floating point register context, if any. */
316 if (flags & _UC_FPU) {
317 KASSERT(l == curlwp);
318 /* Tell PCU we are replacing the FPU contents. */
319 fpu_replace(l);
320
321 /*
322 * The PCB FP regs struct includes the FP CSR, so use the
323 * proper size of fpreg when copying.
324 */
325 struct pcb * const pcb = lwp_getpcb(l);
326 pcb->pcb_fpregs = *(const struct fpreg *)mcp->__fregs;
327 }
328
329 mutex_enter(p->p_lock);
330 if (flags & _UC_SETSTACK)
331 l->l_sigstk.ss_flags |= SS_ONSTACK;
332 if (flags & _UC_CLRSTACK)
333 l->l_sigstk.ss_flags &= ~SS_ONSTACK;
334 mutex_exit(p->p_lock);
335
336 return 0;
337 }
338
339 void
340 cpu_need_resched(struct cpu_info *ci, struct lwp *l, int flags)
341 {
342 KASSERT(kpreempt_disabled());
343
344 if ((flags & RESCHED_KPREEMPT) != 0) {
345 #ifdef __HAVE_PREEMPTION
346 if ((flags & RESCHED_REMOTE) != 0) {
347 cpu_send_ipi(ci, IPI_KPREEMPT);
348 } else {
349 softint_trigger(SOFTINT_KPREEMPT);
350 }
351 #endif
352 return;
353 }
354 if ((flags & RESCHED_REMOTE) != 0) {
355 #ifdef MULTIPROCESSOR
356 cpu_send_ipi(ci, IPI_AST);
357 #endif
358 } else {
359 l->l_md.md_astpending = 1; /* force call to ast() */
360 }
361 }
362
363 void
364 cpu_signotify(struct lwp *l)
365 {
366 KASSERT(kpreempt_disabled());
367 #ifdef __HAVE_FAST_SOFTINTS
368 KASSERT(lwp_locked(l, NULL));
369 #endif
370
371 if (l->l_cpu != curcpu()) {
372 #ifdef MULTIPROCESSOR
373 cpu_send_ipi(l->l_cpu, IPI_AST);
374 #endif
375 } else {
376 l->l_md.md_astpending = 1; /* force call to ast() */
377 }
378 }
379
380 void
381 cpu_need_proftick(struct lwp *l)
382 {
383 KASSERT(kpreempt_disabled());
384 KASSERT(l->l_cpu == curcpu());
385
386 l->l_pflag |= LP_OWEUPC;
387 l->l_md.md_astpending = 1; /* force call to ast() */
388 }
389
390
391 /* Sync the discs, unmount the filesystems, and adjust the todr */
392 static void
393 bootsync(void)
394 {
395 static bool bootsyncdone = false;
396
397 if (bootsyncdone)
398 return;
399
400 bootsyncdone = true;
401
402 /* Make sure we can still manage to do things */
403 if ((csr_sstatus_read() & SR_SIE) == 0) {
404 /*
405 * If we get here then boot has been called without RB_NOSYNC
406 * and interrupts were disabled. This means the boot() call
407 * did not come from a user process e.g. shutdown, but must
408 * have come from somewhere in the kernel.
409 */
410 ENABLE_INTERRUPTS();
411 printf("Warning interrupts disabled during boot()\n");
412 }
413
414 vfs_shutdown();
415 }
416
417
418 void
419 cpu_reboot(int howto, char *bootstr)
420 {
421
422 /*
423 * If RB_NOSYNC was not specified sync the discs.
424 * Note: Unless cold is set to 1 here, syslogd will die during the
425 * unmount. It looks like syslogd is getting woken up only to find
426 * that it cannot page part of the binary in as the filesystem has
427 * been unmounted.
428 */
429 if ((howto & RB_NOSYNC) == 0)
430 bootsync();
431
432 #if 0
433 /* Disable interrupts. */
434 const int s = splhigh();
435
436 /* Do a dump if requested. */
437 if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP)
438 dumpsys();
439
440 splx(s);
441 #endif
442
443 pmf_system_shutdown(boothowto);
444
445 /* Say NO to interrupts for good */
446 splhigh();
447
448 /* Run any shutdown hooks */
449 doshutdownhooks();
450
451 /* Make sure IRQ's are disabled */
452 DISABLE_INTERRUPTS();
453
454 if (howto & RB_HALT) {
455 printf("\n");
456 printf("The operating system has halted.\n");
457 printf("Please press any key to reboot.\n\n");
458 cnpollc(true); /* for proper keyboard command handling */
459 if (cngetc() == 0) {
460 /* no console attached, so just hlt */
461 printf("No keyboard - cannot reboot after all.\n");
462 goto spin;
463 }
464 cnpollc(false);
465 }
466
467 printf("rebooting...\n");
468
469 sbi_system_reset(SBI_RESET_TYPE_COLDREBOOT, SBI_RESET_REASON_NONE);
470 spin:
471 for (;;) {
472 asm volatile("wfi" ::: "memory");
473 }
474 /* NOTREACHED */
475 }
476
477 void
478 cpu_dumpconf(void)
479 {
480 // TBD!!
481 }
482
483
484 int
485 cpu_lwp_setprivate(lwp_t *l, void *addr)
486 {
487 struct trapframe * const tf = lwp_trapframe(l);
488
489 tf->tf_reg[_REG_TP] = (register_t)addr;
490
491 return 0;
492 }
493
494
495 void
496 cpu_startup(void)
497 {
498 vaddr_t minaddr, maxaddr;
499 char pbuf[10]; /* "999999 MB" -- But Sv39 is max 512GB */
500
501 /*
502 * Good {morning,afternoon,evening,night}.
503 */
504 printf("%s%s", copyright, version);
505 format_bytes(pbuf, sizeof(pbuf), ctob(physmem));
506 printf("total memory = %s\n", pbuf);
507
508 minaddr = 0;
509 /*
510 * Allocate a submap for physio.
511 */
512 phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
513 VM_PHYS_SIZE, 0, FALSE, NULL);
514
515 format_bytes(pbuf, sizeof(pbuf), ptoa(uvm_availmem(false)));
516 printf("avail memory = %s\n", pbuf);
517
518 #ifdef MULTIPROCESSOR
519 kcpuset_create(&cpus_halted, true);
520 KASSERT(cpus_halted != NULL);
521
522 kcpuset_create(&cpus_hatched, true);
523 KASSERT(cpus_hatched != NULL);
524
525 kcpuset_create(&cpus_paused, true);
526 KASSERT(cpus_paused != NULL);
527
528 kcpuset_create(&cpus_resumed, true);
529 KASSERT(cpus_resumed != NULL);
530
531 kcpuset_create(&cpus_running, true);
532 KASSERT(cpus_running != NULL);
533
534 kcpuset_set(cpus_hatched, cpu_index(curcpu()));
535 kcpuset_set(cpus_running, cpu_index(curcpu()));
536 #endif
537
538 fdtbus_intr_init();
539
540 fdt_setup_rndseed();
541 fdt_setup_efirng();
542 }
543
544 static void
545 riscv_add_memory(const struct fdt_memory *m, void *arg)
546 {
547 paddr_t first = atop(m->start);
548 paddr_t last = atop(m->end);
549 int freelist = VM_FREELIST_DEFAULT;
550
551 VPRINTF("adding %#16" PRIxPADDR " - %#16" PRIxPADDR" to freelist %d\n",
552 m->start, m->end, freelist);
553
554 uvm_page_physload(first, last, first, last, freelist);
555 physmem += last - first;
556 }
557
558
559 static void
560 cpu_kernel_vm_init(paddr_t memory_start, paddr_t memory_end)
561 {
562 extern char __kernel_text[];
563 extern char _end[];
564
565 vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
566 vaddr_t kernend = round_page((vaddr_t)_end);
567 paddr_t kernstart_phys = KERN_VTOPHYS(kernstart);
568 paddr_t kernend_phys = KERN_VTOPHYS(kernend);
569
570 VPRINTF("%s: kernel phys start %#" PRIxPADDR " end %#" PRIxPADDR "\n",
571 __func__, kernstart_phys, kernend_phys);
572 fdt_memory_remove_range(kernstart_phys,
573 kernend_phys - kernstart_phys);
574
575 #if 0
576 /*
577 * Don't give these pages to UVM.
578 *
579 * cpu_kernel_vm_init need to create proper tables then the following
580 * will be true.
581 *
582 * Now we have APs started the pages used for stacks and L1PT can
583 * be given to uvm
584 */
585 extern char const __start__init_memory[];
586 extern char const __stop__init_memory[] __weak;
587 if (&__start__init_memory[0] != &__stop__init_memory[0]) {
588 const paddr_t spa = KERN_VTOPHYS((vaddr_t)__start__init_memory);
589 const paddr_t epa = KERN_VTOPHYS((vaddr_t)__stop__init_memory);
590
591 VPRINTF("%s: init phys start %#" PRIxPADDR
592 " end %#" PRIxPADDR "\n", __func__, spa, epa);
593 fdt_memory_remove_range(spa, epa - spa);
594 }
595 #endif
596
597 #ifdef _LP64
598 paddr_t pa = memory_start & ~XSEGOFSET;
599 pmap_direct_base = RISCV_DIRECTMAP_START;
600 extern pd_entry_t l2_pte[PAGE_SIZE / sizeof(pd_entry_t)];
601
602
603 const vsize_t vshift = XSEGSHIFT;
604 const vaddr_t pdetab_mask = PMAP_PDETABSIZE - 1;
605 const vsize_t inc = 1UL << vshift;
606
607 const vaddr_t sva = RISCV_DIRECTMAP_START + pa;
608 const vaddr_t eva = RISCV_DIRECTMAP_END;
609 const size_t sidx = (sva >> vshift) & pdetab_mask;
610 const size_t eidx = (eva >> vshift) & pdetab_mask;
611
612 /* Allocate gigapages covering all physical memory in the direct map. */
613 for (size_t i = sidx; i < eidx && pa < memory_end; i++, pa += inc) {
614 l2_pte[i] = PA_TO_PTE(pa) | PTE_KERN | PTE_HARDWIRED | PTE_RW;
615 VPRINTF("dm: %p : %#" PRIxPADDR "\n", &l2_pte[i], l2_pte[i]);
616 }
617 #endif
618 // pt_dump(printf);
619 }
620
621 static void
622 riscv_init_lwp0_uarea(void)
623 {
624 extern char lwp0uspace[];
625
626 uvm_lwp_setuarea(&lwp0, (vaddr_t)lwp0uspace);
627 memset(&lwp0.l_md, 0, sizeof(lwp0.l_md));
628 memset(lwp_getpcb(&lwp0), 0, sizeof(struct pcb));
629
630 struct trapframe *tf = (struct trapframe *)(lwp0uspace + USPACE) - 1;
631 memset(tf, 0, sizeof(*tf));
632
633 lwp0.l_md.md_utf = lwp0.l_md.md_ktf = tf;
634 }
635
636
637 static void
638 riscv_print_memory(const struct fdt_memory *m, void *arg)
639 {
640
641 VPRINTF("FDT /memory @ 0x%" PRIx64 " size 0x%" PRIx64 "\n",
642 m->start, m->end - m->start);
643 }
644
645
646 static void
647 parse_mi_bootargs(char *args)
648 {
649 int howto;
650 bool found, start, skipping;
651
652 if (args == NULL)
653 return;
654
655 start = true;
656 skipping = false;
657 for (char *cp = args; *cp; cp++) {
658 /* check for "words" starting with a "-" only */
659 if (start) {
660 if (*cp == '-') {
661 skipping = false;
662 } else {
663 skipping = true;
664 }
665 start = false;
666 continue;
667 }
668
669 if (*cp == ' ') {
670 start = true;
671 skipping = false;
672 continue;
673 }
674
675 if (skipping) {
676 continue;
677 }
678
679 /* Check valid boot flags */
680 howto = 0;
681 BOOT_FLAG(*cp, howto);
682 if (!howto)
683 printf("bootflag '%c' not recognised\n", *cp);
684 else
685 boothowto |= howto;
686 }
687
688 found = optstr_get(args, "root", bootdevstr, sizeof(bootdevstr));
689 if (found) {
690 bootspec = bootdevstr;
691 }
692 }
693
694
695 void
696 init_riscv(register_t hartid, paddr_t dtb)
697 {
698
699 /* set temporally to work printf()/panic() even before consinit() */
700 cn_tab = &earlycons;
701
702 /* Load FDT */
703 const vaddr_t dtbva = VM_KERNEL_DTB_BASE + (dtb & (NBSEG - 1));
704 void *fdt_data = (void *)dtbva;
705 int error = fdt_check_header(fdt_data);
706 if (error != 0)
707 panic("fdt_check_header failed: %s", fdt_strerror(error));
708
709 fdtbus_init(fdt_data);
710
711 /* Lookup platform specific backend */
712 const struct fdt_platform * const plat = fdt_platform_find();
713 if (plat == NULL)
714 panic("Kernel does not support this device");
715
716 /* Early console may be available, announce ourselves. */
717 VPRINTF("FDT<%p>\n", fdt_data);
718
719 boot_args = fdt_get_bootargs();
720
721 VPRINTF("devmap %p\n", plat->fp_devmap());
722 pmap_devmap_bootstrap(0, plat->fp_devmap());
723
724 VPRINTF("bootstrap\n");
725 plat->fp_bootstrap();
726
727 /*
728 * If stdout-path is specified on the command line, override the
729 * value in /chosen/stdout-path before initializing console.
730 */
731 VPRINTF("stdout\n");
732 fdt_update_stdout_path(fdt_data, boot_args);
733
734 /*
735 * Done making changes to the FDT.
736 */
737 fdt_pack(fdt_data);
738
739 const uint32_t dtbsize = round_page(fdt_totalsize(fdt_data));
740
741 VPRINTF("fdt size %x/%x\n", dtbsize, fdt_totalsize(fdt_data));
742
743 VPRINTF("consinit ");
744 consinit();
745 VPRINTF("ok\n");
746
747 /* Talk to the user */
748 printf("NetBSD/riscv (fdt) booting ...\n");
749
750 #ifdef BOOT_ARGS
751 char mi_bootargs[] = BOOT_ARGS;
752 parse_mi_bootargs(mi_bootargs);
753 #endif
754
755 uint64_t memory_start, memory_end;
756 fdt_memory_get(&memory_start, &memory_end);
757 physical_start = memory_start;
758 physical_end = memory_end;
759
760 fdt_memory_foreach(riscv_print_memory, NULL);
761
762 /* Cannot map memory above largest page number */
763 const uint64_t maxppn = __SHIFTOUT_MASK(PTE_PPN) - 1;
764 const uint64_t memory_limit = ptoa(maxppn);
765
766 if (memory_end > memory_limit) {
767 fdt_memory_remove_range(memory_limit, memory_end);
768 memory_end = memory_limit;
769 }
770
771 uint64_t memory_size __unused = memory_end - memory_start;
772
773 VPRINTF("%s: memory start %" PRIx64 " end %" PRIx64 " (len %"
774 PRIx64 ")\n", __func__, memory_start, memory_end, memory_size);
775
776 /* Parse ramdisk, rndseed, and firmware's RNG from EFI */
777 fdt_probe_initrd();
778 fdt_probe_rndseed();
779 fdt_probe_efirng();
780
781 fdt_memory_remove_reserved(memory_start, memory_end);
782
783 fdt_memory_remove_range(dtb, dtbsize);
784 fdt_reserve_initrd();
785 fdt_reserve_rndseed();
786 fdt_reserve_efirng();
787
788 /* Perform PT build and VM init */
789 cpu_kernel_vm_init(memory_start, memory_end);
790
791 VPRINTF("bootargs: %s\n", boot_args);
792
793 parse_mi_bootargs(boot_args);
794
795 #ifdef DDB
796 if (boothowto & RB_KDB) {
797 printf("Entering DDB...\n");
798 cpu_Debugger();
799 }
800 #endif
801
802 extern char __kernel_text[];
803 extern char _end[];
804 // extern char __data_start[];
805 // extern char __rodata_start[];
806
807 vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
808 vaddr_t kernend = round_page((vaddr_t)_end);
809 paddr_t kernstart_phys __unused = KERN_VTOPHYS(kernstart);
810 paddr_t kernend_phys __unused = KERN_VTOPHYS(kernend);
811
812 vaddr_t kernelvmstart;
813
814 vaddr_t kernstart_mega __unused = MEGAPAGE_TRUNC(kernstart);
815 vaddr_t kernend_mega = MEGAPAGE_ROUND(kernend);
816
817 kernelvmstart = kernend_mega;
818
819 #if 0
820 #ifdef MODULAR
821 #define MODULE_RESERVED_MAX (1024 * 1024 * 128)
822 #define MODULE_RESERVED_SIZE (1024 * 1024 * 32) /* good enough? */
823 module_start = kernelvmstart;
824 module_end = kernend_mega + MODULE_RESERVED_SIZE;
825 if (module_end >= kernstart_mega + MODULE_RESERVED_MAX)
826 module_end = kernstart_mega + MODULE_RESERVED_MAX;
827 KASSERT(module_end > kernend_mega);
828 kernelvmstart = module_end;
829 #endif /* MODULAR */
830 #endif
831 KASSERT(kernelvmstart < VM_KERNEL_VM_BASE);
832
833 kernelvmstart = VM_KERNEL_VM_BASE;
834
835 /*
836 * msgbuf is allocated from the top of the last biggest memory block.
837 */
838 paddr_t msgbufaddr = 0;
839
840 #ifdef _LP64
841 /* XXX check all ranges for last one with a big enough hole */
842 msgbufaddr = memory_end - MSGBUFSIZE;
843 KASSERT(msgbufaddr != 0); /* no space for msgbuf */
844 fdt_memory_remove_range(msgbufaddr, msgbufaddr + MSGBUFSIZE);
845 msgbufaddr = RISCV_PA_TO_KVA(msgbufaddr);
846 VPRINTF("msgbufaddr = %#lx\n", msgbufaddr);
847 initmsgbuf((void *)msgbufaddr, MSGBUFSIZE);
848 #endif
849
850 KASSERT(msgbufaddr != 0); /* no space for msgbuf */
851 #ifdef _LP64
852 initmsgbuf((void *)RISCV_PA_TO_KVA(msgbufaddr), MSGBUFSIZE);
853 #endif
854
855 #define DPRINTF(v) VPRINTF("%24s = 0x%16lx\n", #v, (unsigned long)v);
856
857 VPRINTF("------------------------------------------\n");
858 DPRINTF(kern_vtopdiff);
859 DPRINTF(memory_start);
860 DPRINTF(memory_end);
861 DPRINTF(memory_size);
862 DPRINTF(kernstart_phys);
863 DPRINTF(kernend_phys)
864 DPRINTF(msgbufaddr);
865 // DPRINTF(physical_end);
866 DPRINTF(VM_MIN_KERNEL_ADDRESS);
867 DPRINTF(kernstart_mega);
868 DPRINTF(kernstart);
869 DPRINTF(kernend);
870 DPRINTF(kernend_mega);
871 #if 0
872 #ifdef MODULAR
873 DPRINTF(module_start);
874 DPRINTF(module_end);
875 #endif
876 #endif
877 DPRINTF(VM_MAX_KERNEL_ADDRESS);
878 #ifdef _LP64
879 DPRINTF(pmap_direct_base);
880 #endif
881 VPRINTF("------------------------------------------\n");
882
883 #undef DPRINTF
884
885 uvm_md_init();
886
887 /*
888 * pass memory pages to uvm
889 */
890 physmem = 0;
891 fdt_memory_foreach(riscv_add_memory, NULL);
892
893 pmap_bootstrap(kernelvmstart, VM_MAX_KERNEL_ADDRESS);
894
895 kasan_init();
896
897 /* Finish setting up lwp0 on our end before we call main() */
898 riscv_init_lwp0_uarea();
899
900
901 error = 0;
902 if ((boothowto & RB_MD1) == 0) {
903 VPRINTF("mpstart\n");
904 if (plat->fp_mpstart)
905 error = plat->fp_mpstart();
906 }
907 if (error)
908 printf("AP startup problems\n");
909 }
910
911
912 #ifdef __HAVE_MM_MD_KERNACC
913
914 #define IN_RANGE_P(addr, start, end) (start) <= (addr) && (addr) < (end)
915 #ifdef _LP64
916 #define IN_DIRECTMAP_P(va) \
917 IN_RANGE_P(va, RISCV_DIRECTMAP_START, RISCV_DIRECTMAP_END)
918 #else
919 #define IN_DIRECTMAP_P(va) false
920 #endif
921
922 int
923 mm_md_kernacc(void *ptr, vm_prot_t prot, bool *handled)
924 {
925 extern char __kernel_text[];
926 extern char _end[];
927 extern char __data_start[];
928
929 const vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
930 const vaddr_t kernend = round_page((vaddr_t)_end);
931 const vaddr_t data_start = (vaddr_t)__data_start;
932
933 const vaddr_t va = (vaddr_t)ptr;
934
935 *handled = false;
936 if (IN_RANGE_P(va, kernstart, kernend)) {
937 *handled = true;
938 if (va < data_start && (prot & VM_PROT_WRITE) != 0) {
939 return EFAULT;
940 }
941 } else if (IN_DIRECTMAP_P(va)) {
942 *handled = true;
943 }
944
945 return 0;
946 }
947 #endif
948
949
950 #ifdef _LP64
951 static void
952 pte_bits(void (*pr)(const char *, ...), pt_entry_t pte)
953 {
954 (*pr)("%c%c%c%c%c%c%c%c",
955 (pte & PTE_D) ? 'D' : '.',
956 (pte & PTE_A) ? 'A' : '.',
957 (pte & PTE_G) ? 'G' : '.',
958 (pte & PTE_U) ? 'U' : '.',
959 (pte & PTE_X) ? 'X' : '.',
960 (pte & PTE_W) ? 'W' : '.',
961 (pte & PTE_R) ? 'R' : '.',
962 (pte & PTE_V) ? 'V' : '.');
963 }
964
965 static void
966 dump_ln_table(paddr_t pdp_pa, int topbit, int level, vaddr_t va,
967 void (*pr)(const char *, ...) __printflike(1, 2))
968 {
969 pd_entry_t *pdp = (void *)PMAP_DIRECT_MAP(pdp_pa);
970
971 (*pr)("l%u @ pa %#16" PRIxREGISTER "\n", level, pdp_pa);
972 for (size_t i = 0; i < PAGE_SIZE / sizeof(pd_entry_t); i++) {
973 pd_entry_t entry = pdp[i];
974
975 if (topbit) {
976 va = i << (PGSHIFT + level * SEGLENGTH);
977 if (va & __BIT(topbit)) {
978 va |= __BITS(63, topbit);
979 }
980 }
981 if (entry != 0) {
982 paddr_t pa = __SHIFTOUT(entry, PTE_PPN) << PGSHIFT;
983 // check level PPN bits.
984 if (PTE_ISLEAF_P(entry)) {
985 (*pr)("l%u %3zu va 0x%016lx pa 0x%012lx - ",
986 level, i, va, pa);
987 pte_bits(pr, entry);
988 (*pr)("\n");
989 } else {
990 (*pr)("l%u %3zu va 0x%016lx -> 0x%012lx - ",
991 level, i, va, pa);
992 pte_bits(pr, entry);
993 (*pr)("\n");
994 if (level == 0) {
995 (*pr)("wtf\n");
996 continue;
997 }
998 if (pte_pde_valid_p(entry))
999 dump_ln_table(pa, 0, level - 1, va, pr);
1000 }
1001 }
1002 va += 1UL << (PGSHIFT + level * SEGLENGTH);
1003 }
1004 }
1005
1006 void
1007 pt_dump(void (*pr)(const char *, ...) __printflike(1, 2))
1008 {
1009 const register_t satp = csr_satp_read();
1010 size_t topbit = sizeof(long) * NBBY - 1;
1011
1012 #ifdef _LP64
1013 const paddr_t satp_pa = __SHIFTOUT(satp, SATP_PPN) << PGSHIFT;
1014 const uint8_t mode = __SHIFTOUT(satp, SATP_MODE);
1015 u_int level = 1;
1016
1017 switch (mode) {
1018 case SATP_MODE_SV39:
1019 case SATP_MODE_SV48:
1020 topbit = (39 - 1) + (mode - 8) * SEGLENGTH;
1021 level = mode - 6;
1022 break;
1023 }
1024 #endif
1025 (*pr)("topbit = %zu\n", topbit);
1026
1027 (*pr)("satp = 0x%" PRIxREGISTER "\n", satp);
1028 #ifdef _LP64
1029 dump_ln_table(satp_pa, topbit, level, 0, pr);
1030 #endif
1031 }
1032 #endif
1033
1034 void
1035 consinit(void)
1036 {
1037 static bool initialized = false;
1038 const struct fdt_console *cons = fdtbus_get_console();
1039 const struct fdt_platform *plat = fdt_platform_find();
1040
1041 if (initialized || cons == NULL)
1042 return;
1043
1044 u_int uart_freq = 0;
1045 extern struct bus_space riscv_generic_bs_tag;
1046 struct fdt_attach_args faa = {
1047 .faa_bst = &riscv_generic_bs_tag,
1048 };
1049
1050 faa.faa_phandle = fdtbus_get_stdout_phandle();
1051 if (plat->fp_uart_freq != NULL)
1052 uart_freq = plat->fp_uart_freq();
1053
1054 cons->consinit(&faa, uart_freq);
1055
1056 initialized = true;
1057 }
1058