riscv_machdep.c revision 1.45 1 /* $NetBSD: riscv_machdep.c,v 1.45 2025/09/06 22:53:48 thorpej Exp $ */
2
3 /*-
4 * Copyright (c) 2014, 2019, 2022 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matt Thomas of 3am Software Foundry, and by Nick Hudson.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include "opt_ddb.h"
33 #include "opt_modular.h"
34 #include "opt_multiprocessor.h"
35 #include "opt_riscv_debug.h"
36
37 #include <sys/cdefs.h>
38 __RCSID("$NetBSD: riscv_machdep.c,v 1.45 2025/09/06 22:53:48 thorpej Exp $");
39
40 #include <sys/param.h>
41
42 #include <sys/asan.h>
43 #include <sys/boot_flag.h>
44 #include <sys/cpu.h>
45 #include <sys/exec.h>
46 #include <sys/kmem.h>
47 #include <sys/ktrace.h>
48 #include <sys/lwp.h>
49 #include <sys/module.h>
50 #include <sys/mount.h>
51 #include <sys/msgbuf.h>
52 #include <sys/optstr.h>
53 #include <sys/proc.h>
54 #include <sys/reboot.h>
55 #include <sys/syscall.h>
56 #include <sys/sysctl.h>
57 #include <sys/systm.h>
58
59 #include <dev/cons.h>
60 #ifdef __HAVE_MM_MD_KERNACC
61 #include <dev/mm.h>
62 #endif
63
64 #include <uvm/uvm_extern.h>
65
66 #include <riscv/frame.h>
67 #include <riscv/locore.h>
68 #include <riscv/machdep.h>
69 #include <riscv/pte.h>
70 #include <riscv/sbi.h>
71 #include <riscv/userret.h>
72
73 #include <libfdt.h>
74 #include <dev/fdt/fdtvar.h>
75 #include <dev/fdt/fdt_boot.h>
76 #include <dev/fdt/fdt_console.h>
77 #include <dev/fdt/fdt_memory.h>
78 #include <dev/fdt/fdt_private.h>
79 #include <dev/fdt/fdt_platform.h>
80
81 int cpu_printfataltraps = 1;
82 char machine[] = MACHINE;
83 char machine_arch[] = MACHINE_ARCH;
84
85 #ifdef VERBOSE_INIT_RISCV
86 #define VPRINTF(...) printf(__VA_ARGS__)
87 #else
88 #define VPRINTF(...) __nothing
89 #endif
90
91 /* 64 should be enough, even for a ZFS UUID */
92 #define MAX_BOOT_DEV_STR 64
93
94 char bootdevstr[MAX_BOOT_DEV_STR] = "";
95 char *boot_args = NULL;
96
97 paddr_t physical_start;
98 paddr_t physical_end;
99
100 static void
101 earlyconsputc(dev_t dev, int c)
102 {
103 uartputc(c);
104 }
105
106 static int
107 earlyconsgetc(dev_t dev)
108 {
109 return uartgetc();
110 }
111
112 static struct consdev earlycons = {
113 .cn_putc = earlyconsputc,
114 .cn_getc = earlyconsgetc,
115 .cn_pollc = nullcnpollc,
116 };
117
118 struct vm_map *phys_map;
119
120 struct trapframe cpu_ddb_regs;
121 const pcu_ops_t * const pcu_ops_md_defs[PCU_UNIT_COUNT] = {
122 #ifdef FPE
123 [PCU_FPU] = &pcu_fpu_ops,
124 #endif
125 };
126
127 /*
128 * Used by PHYSTOV and VTOPHYS -- Will be set be BSS is zeroed so
129 * keep it in data
130 */
131 unsigned long kern_vtopdiff __attribute__((__section__(".data")));
132
133
134 /*
135 * machine dependent system variables.
136 */
137 SYSCTL_SETUP(sysctl_machdep_setup, "sysctl machdep subtree setup")
138 {
139 sysctl_createv(clog, 0, NULL, NULL,
140 CTLFLAG_PERMANENT,
141 CTLTYPE_NODE, "machdep", NULL,
142 NULL, 0, NULL, 0,
143 CTL_MACHDEP, CTL_EOL);
144 }
145
146 #ifdef MODULAR
147 /*
148 * Push any modules loaded by the boot loader.
149 */
150 void
151 module_init_md(void)
152 {
153 }
154 #endif /* MODULAR */
155
156 /*
157 * Set registers on exec.
158 * Clear all registers except sp, pc.
159 * sp is set to the stack pointer passed in. pc is set to the entry
160 * point given by the exec_package passed in.
161 */
162 void
163 setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
164 {
165 struct trapframe * const tf = l->l_md.md_utf;
166 struct proc * const p = l->l_proc;
167
168 memset(tf, 0, sizeof(*tf));
169 tf->tf_sp = (intptr_t)stack_align(stack);
170 tf->tf_pc = (intptr_t)pack->ep_entry & ~1;
171 #ifdef _LP64
172 tf->tf_sr = (p->p_flag & PK_32) ? SR_USER32 : SR_USER64;
173 #else
174 tf->tf_sr = SR_USER;
175 #endif
176
177 // Set up arguments for ___start(cleanup, ps_strings)
178 tf->tf_a0 = 0; // cleanup
179 tf->tf_a1 = p->p_psstrp; // ps_strings
180
181 /*
182 * Must have interrupts disabled for exception return.
183 * Must be switching to user mode.
184 * Must enable interrupts after sret.
185 */
186 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SIE) == 0);
187 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPP) == 0);
188 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPIE) != 0);
189 }
190
191 void
192 md_child_return(struct lwp *l)
193 {
194 struct trapframe * const tf = lwp_trapframe(l);
195
196 tf->tf_a0 = 0;
197 tf->tf_a1 = 1;
198 #ifdef FPE
199 /* Disable FP as we can't be using it (yet). */
200 tf->tf_sr &= ~SR_FS;
201 #endif
202
203 /*
204 * Must have interrupts disabled for exception return.
205 * Must be switching to user mode.
206 * Must enable interrupts after sret.
207 */
208
209 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SIE) == 0);
210 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPP) == 0);
211 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPIE) != 0);
212
213 userret(l);
214 }
215
216 /*
217 * Process the tail end of a posix_spawn() for the child.
218 */
219 void
220 cpu_spawn_return(struct lwp *l)
221 {
222 userret(l);
223 }
224
225 /*
226 * Start a new LWP
227 */
228 void
229 startlwp(void *arg)
230 {
231 ucontext_t * const uc = arg;
232 lwp_t * const l = curlwp;
233 int error __diagused;
234
235 error = cpu_setmcontext(l, &uc->uc_mcontext, uc->uc_flags);
236 KASSERT(error == 0);
237
238 kmem_free(uc, sizeof(*uc));
239 userret(l);
240 }
241
242 // We've worked hard to make sure struct reg and __gregset_t are the same.
243 // Ditto for struct fpreg and fregset_t.
244
245 #ifdef _LP64
246 CTASSERT(sizeof(struct reg) == sizeof(__gregset_t));
247 #endif
248 CTASSERT(sizeof(struct fpreg) == sizeof(__fregset_t));
249
250 void
251 cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags)
252 {
253 const struct trapframe * const tf = l->l_md.md_utf;
254
255 /* Save register context. */
256 *(struct reg *)mcp->__gregs = tf->tf_regs;
257
258 *flags |= _UC_CPU | _UC_TLSBASE;
259
260 /* Save floating point register context, if any. */
261 KASSERT(l == curlwp);
262 if (fpu_valid_p(l)) {
263 /*
264 * If this process is the current FP owner, dump its
265 * context to the PCB first.
266 */
267 fpu_save(l);
268
269 struct pcb * const pcb = lwp_getpcb(l);
270 *(struct fpreg *)mcp->__fregs = pcb->pcb_fpregs;
271 *flags |= _UC_FPU;
272 }
273 }
274
275 int
276 cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp)
277 {
278 /*
279 * Verify that at least the PC and SP are user addresses.
280 */
281 if ((intptr_t) mcp->__gregs[_REG_PC] < 0
282 || (intptr_t) mcp->__gregs[_REG_SP] < 0
283 || (mcp->__gregs[_REG_PC] & 1))
284 return EINVAL;
285
286 return 0;
287 }
288
289 int
290 cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags)
291 {
292 struct trapframe * const tf = l->l_md.md_utf;
293 struct proc * const p = l->l_proc;
294 const __greg_t * const gr = mcp->__gregs;
295 int error;
296
297 /* Restore register context, if any. */
298 if (flags & _UC_CPU) {
299 error = cpu_mcontext_validate(l, mcp);
300 if (error)
301 return error;
302
303 /*
304 * Avoid updating TLS register here.
305 */
306 const __greg_t saved_tp = tf->tf_reg[_REG_TP];
307 tf->tf_regs = *(const struct reg *)gr;
308 tf->tf_reg[_REG_TP] = saved_tp;
309 }
310
311 /* Restore the private thread context */
312 if (flags & _UC_TLSBASE) {
313 lwp_setprivate(l, (void *)(intptr_t)mcp->__gregs[_X_TP]);
314 }
315
316 /* Restore floating point register context, if any. */
317 if (flags & _UC_FPU) {
318 KASSERT(l == curlwp);
319 /* Tell PCU we are replacing the FPU contents. */
320 fpu_replace(l);
321
322 /*
323 * The PCB FP regs struct includes the FP CSR, so use the
324 * proper size of fpreg when copying.
325 */
326 struct pcb * const pcb = lwp_getpcb(l);
327 pcb->pcb_fpregs = *(const struct fpreg *)mcp->__fregs;
328 }
329
330 mutex_enter(p->p_lock);
331 if (flags & _UC_SETSTACK)
332 l->l_sigstk.ss_flags |= SS_ONSTACK;
333 if (flags & _UC_CLRSTACK)
334 l->l_sigstk.ss_flags &= ~SS_ONSTACK;
335 mutex_exit(p->p_lock);
336
337 return 0;
338 }
339
340 void
341 cpu_need_resched(struct cpu_info *ci, struct lwp *l, int flags)
342 {
343 KASSERT(kpreempt_disabled());
344
345 if ((flags & RESCHED_KPREEMPT) != 0) {
346 #ifdef __HAVE_PREEMPTION
347 if ((flags & RESCHED_REMOTE) != 0) {
348 cpu_send_ipi(ci, IPI_KPREEMPT);
349 } else {
350 softint_trigger(SOFTINT_KPREEMPT);
351 }
352 #endif
353 return;
354 }
355 if ((flags & RESCHED_REMOTE) != 0) {
356 #ifdef MULTIPROCESSOR
357 cpu_send_ipi(ci, IPI_AST);
358 #endif
359 } else {
360 l->l_md.md_astpending = 1; /* force call to ast() */
361 }
362 }
363
364 void
365 cpu_signotify(struct lwp *l)
366 {
367 KASSERT(kpreempt_disabled());
368 #ifdef __HAVE_FAST_SOFTINTS
369 KASSERT(lwp_locked(l, NULL));
370 #endif
371
372 if (l->l_cpu != curcpu()) {
373 #ifdef MULTIPROCESSOR
374 cpu_send_ipi(l->l_cpu, IPI_AST);
375 #endif
376 } else {
377 l->l_md.md_astpending = 1; /* force call to ast() */
378 }
379 }
380
381 void
382 cpu_need_proftick(struct lwp *l)
383 {
384 KASSERT(kpreempt_disabled());
385 KASSERT(l->l_cpu == curcpu());
386
387 l->l_pflag |= LP_OWEUPC;
388 l->l_md.md_astpending = 1; /* force call to ast() */
389 }
390
391
392 /* Sync the discs, unmount the filesystems, and adjust the todr */
393 static void
394 bootsync(void)
395 {
396 static bool bootsyncdone = false;
397
398 if (bootsyncdone)
399 return;
400
401 bootsyncdone = true;
402
403 /* Make sure we can still manage to do things */
404 if ((csr_sstatus_read() & SR_SIE) == 0) {
405 /*
406 * If we get here then boot has been called without RB_NOSYNC
407 * and interrupts were disabled. This means the boot() call
408 * did not come from a user process e.g. shutdown, but must
409 * have come from somewhere in the kernel.
410 */
411 ENABLE_INTERRUPTS();
412 printf("Warning interrupts disabled during boot()\n");
413 }
414
415 vfs_shutdown();
416 }
417
418
419 void
420 cpu_reboot(int howto, char *bootstr)
421 {
422
423 /*
424 * If RB_NOSYNC was not specified sync the discs.
425 * Note: Unless cold is set to 1 here, syslogd will die during the
426 * unmount. It looks like syslogd is getting woken up only to find
427 * that it cannot page part of the binary in as the filesystem has
428 * been unmounted.
429 */
430 if ((howto & RB_NOSYNC) == 0)
431 bootsync();
432
433 #if 0
434 /* Disable interrupts. */
435 const int s = splhigh();
436
437 /* Do a dump if requested. */
438 if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP)
439 dumpsys();
440
441 splx(s);
442 #endif
443
444 pmf_system_shutdown(boothowto);
445
446 /* Say NO to interrupts for good */
447 splhigh();
448
449 /* Run any shutdown hooks */
450 doshutdownhooks();
451
452 /* Make sure IRQ's are disabled */
453 DISABLE_INTERRUPTS();
454
455 if (howto & RB_HALT) {
456 printf("\n");
457 printf("The operating system has halted.\n");
458 printf("Please press any key to reboot.\n\n");
459 cnpollc(true); /* for proper keyboard command handling */
460 if (cngetc() == 0) {
461 /* no console attached, so just hlt */
462 printf("No keyboard - cannot reboot after all.\n");
463 goto spin;
464 }
465 cnpollc(false);
466 }
467
468 printf("rebooting...\n");
469
470 sbi_system_reset(SBI_RESET_TYPE_COLDREBOOT, SBI_RESET_REASON_NONE);
471 spin:
472 for (;;) {
473 asm volatile("wfi" ::: "memory");
474 }
475 /* NOTREACHED */
476 }
477
478 void
479 cpu_dumpconf(void)
480 {
481 // TBD!!
482 }
483
484
485 int
486 cpu_lwp_setprivate(lwp_t *l, void *addr)
487 {
488 struct trapframe * const tf = lwp_trapframe(l);
489
490 tf->tf_reg[_REG_TP] = (register_t)addr;
491
492 return 0;
493 }
494
495
496 void
497 cpu_startup(void)
498 {
499 vaddr_t minaddr, maxaddr;
500 char pbuf[10]; /* "999999 MB" -- But Sv39 is max 512GB */
501
502 /*
503 * Good {morning,afternoon,evening,night}.
504 */
505 printf("%s%s", copyright, version);
506 format_bytes(pbuf, sizeof(pbuf), ctob(physmem));
507 printf("total memory = %s\n", pbuf);
508
509 minaddr = 0;
510 /*
511 * Allocate a submap for physio.
512 */
513 phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
514 VM_PHYS_SIZE, 0, FALSE, NULL);
515
516 format_bytes(pbuf, sizeof(pbuf), ptoa(uvm_availmem(false)));
517 printf("avail memory = %s\n", pbuf);
518
519 #ifdef MULTIPROCESSOR
520 kcpuset_create(&cpus_halted, true);
521 KASSERT(cpus_halted != NULL);
522
523 kcpuset_create(&cpus_hatched, true);
524 KASSERT(cpus_hatched != NULL);
525
526 kcpuset_create(&cpus_paused, true);
527 KASSERT(cpus_paused != NULL);
528
529 kcpuset_create(&cpus_resumed, true);
530 KASSERT(cpus_resumed != NULL);
531
532 kcpuset_create(&cpus_running, true);
533 KASSERT(cpus_running != NULL);
534
535 kcpuset_set(cpus_hatched, cpu_index(curcpu()));
536 kcpuset_set(cpus_running, cpu_index(curcpu()));
537 #endif
538
539 fdtbus_intr_init();
540
541 fdt_setup_rndseed();
542 fdt_setup_efirng();
543 }
544
545 static void
546 riscv_add_memory(const struct fdt_memory *m, void *arg)
547 {
548 paddr_t first = atop(m->start);
549 paddr_t last = atop(m->end);
550 int freelist = VM_FREELIST_DEFAULT;
551
552 VPRINTF("adding %#16" PRIxPADDR " - %#16" PRIxPADDR" to freelist %d\n",
553 m->start, m->end, freelist);
554
555 uvm_page_physload(first, last, first, last, freelist);
556 physmem += last - first;
557 }
558
559
560 static void
561 cpu_kernel_vm_init(paddr_t memory_start, paddr_t memory_end)
562 {
563 extern char __kernel_text[];
564 extern char _end[];
565
566 vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
567 vaddr_t kernend = round_page((vaddr_t)_end);
568 paddr_t kernstart_phys = KERN_VTOPHYS(kernstart);
569 paddr_t kernend_phys = KERN_VTOPHYS(kernend);
570
571 VPRINTF("%s: kernel phys start %#" PRIxPADDR " end %#" PRIxPADDR "\n",
572 __func__, kernstart_phys, kernend_phys);
573 fdt_memory_remove_range(kernstart_phys,
574 kernend_phys - kernstart_phys);
575
576 #if 0
577 /*
578 * Don't give these pages to UVM.
579 *
580 * cpu_kernel_vm_init need to create proper tables then the following
581 * will be true.
582 *
583 * Now we have APs started the pages used for stacks and L1PT can
584 * be given to uvm
585 */
586 extern char const __start__init_memory[];
587 extern char const __stop__init_memory[] __weak;
588 if (&__start__init_memory[0] != &__stop__init_memory[0]) {
589 const paddr_t spa = KERN_VTOPHYS((vaddr_t)__start__init_memory);
590 const paddr_t epa = KERN_VTOPHYS((vaddr_t)__stop__init_memory);
591
592 VPRINTF("%s: init phys start %#" PRIxPADDR
593 " end %#" PRIxPADDR "\n", __func__, spa, epa);
594 fdt_memory_remove_range(spa, epa - spa);
595 }
596 #endif
597
598 #ifdef _LP64
599 paddr_t pa = memory_start & ~XSEGOFSET;
600 pmap_direct_base = RISCV_DIRECTMAP_START;
601 extern pd_entry_t l2_pte[PAGE_SIZE / sizeof(pd_entry_t)];
602
603
604 const vsize_t vshift = XSEGSHIFT;
605 const vaddr_t pdetab_mask = PMAP_PDETABSIZE - 1;
606 const vsize_t inc = 1UL << vshift;
607
608 const vaddr_t sva = RISCV_DIRECTMAP_START + pa;
609 const vaddr_t eva = RISCV_DIRECTMAP_END;
610 const size_t sidx = (sva >> vshift) & pdetab_mask;
611 const size_t eidx = (eva >> vshift) & pdetab_mask;
612
613 /* Allocate gigapages covering all physical memory in the direct map. */
614 for (size_t i = sidx; i < eidx && pa < memory_end; i++, pa += inc) {
615 l2_pte[i] = PA_TO_PTE(pa) | PTE_KERN | PTE_HARDWIRED | PTE_RW;
616 VPRINTF("dm: %p : %#" PRIxPADDR "\n", &l2_pte[i], l2_pte[i]);
617 }
618 #endif
619 // pt_dump(printf);
620 }
621
622 static void
623 riscv_init_lwp0_uarea(void)
624 {
625 extern char lwp0uspace[];
626
627 uvm_lwp_setuarea(&lwp0, (vaddr_t)lwp0uspace);
628 memset(&lwp0.l_md, 0, sizeof(lwp0.l_md));
629 memset(lwp_getpcb(&lwp0), 0, sizeof(struct pcb));
630
631 struct trapframe *tf = (struct trapframe *)(lwp0uspace + USPACE) - 1;
632 memset(tf, 0, sizeof(*tf));
633
634 lwp0.l_md.md_utf = lwp0.l_md.md_ktf = tf;
635 }
636
637
638 static void
639 riscv_print_memory(const struct fdt_memory *m, void *arg)
640 {
641
642 VPRINTF("FDT /memory @ 0x%" PRIx64 " size 0x%" PRIx64 "\n",
643 m->start, m->end - m->start);
644 }
645
646
647 static void
648 parse_mi_bootargs(char *args)
649 {
650 int howto;
651 bool found, start, skipping;
652
653 if (args == NULL)
654 return;
655
656 start = true;
657 skipping = false;
658 for (char *cp = args; *cp; cp++) {
659 /* check for "words" starting with a "-" only */
660 if (start) {
661 if (*cp == '-') {
662 skipping = false;
663 } else {
664 skipping = true;
665 }
666 start = false;
667 continue;
668 }
669
670 if (*cp == ' ') {
671 start = true;
672 skipping = false;
673 continue;
674 }
675
676 if (skipping) {
677 continue;
678 }
679
680 /* Check valid boot flags */
681 howto = 0;
682 BOOT_FLAG(*cp, howto);
683 if (!howto)
684 printf("bootflag '%c' not recognised\n", *cp);
685 else
686 boothowto |= howto;
687 }
688
689 found = optstr_get(args, "root", bootdevstr, sizeof(bootdevstr));
690 if (found) {
691 bootspec = bootdevstr;
692 }
693 }
694
695
696 void
697 init_riscv(register_t hartid, paddr_t dtb)
698 {
699
700 /* set temporally to work printf()/panic() even before consinit() */
701 cn_tab = &earlycons;
702
703 /* Load FDT */
704 const vaddr_t dtbva = VM_KERNEL_DTB_BASE + (dtb & (NBSEG - 1));
705 void *fdt_data = (void *)dtbva;
706 int error = fdt_check_header(fdt_data);
707 if (error != 0)
708 panic("fdt_check_header failed: %s", fdt_strerror(error));
709
710 fdtbus_init(fdt_data);
711
712 /* Lookup platform specific backend */
713 const struct fdt_platform * const plat = fdt_platform_find();
714 if (plat == NULL)
715 panic("Kernel does not support this device");
716
717 /* Early console may be available, announce ourselves. */
718 VPRINTF("FDT<%p>\n", fdt_data);
719
720 boot_args = fdt_get_bootargs();
721
722 VPRINTF("devmap %p\n", plat->fp_devmap());
723 pmap_devmap_bootstrap(0, plat->fp_devmap());
724
725 VPRINTF("bootstrap\n");
726 plat->fp_bootstrap();
727
728 /*
729 * If stdout-path is specified on the command line, override the
730 * value in /chosen/stdout-path before initializing console.
731 */
732 VPRINTF("stdout\n");
733 fdt_update_stdout_path(fdt_data, boot_args);
734
735 /*
736 * Done making changes to the FDT.
737 */
738 fdt_pack(fdt_data);
739
740 const uint32_t dtbsize = round_page(fdt_totalsize(fdt_data));
741
742 VPRINTF("fdt size %x/%x\n", dtbsize, fdt_totalsize(fdt_data));
743
744 VPRINTF("consinit ");
745 consinit();
746 VPRINTF("ok\n");
747
748 /* Talk to the user */
749 printf("NetBSD/riscv (fdt) booting ...\n");
750
751 #ifdef BOOT_ARGS
752 char mi_bootargs[] = BOOT_ARGS;
753 parse_mi_bootargs(mi_bootargs);
754 #endif
755
756 uint64_t memory_start, memory_end;
757 fdt_memory_get(&memory_start, &memory_end);
758 physical_start = memory_start;
759 physical_end = memory_end;
760
761 fdt_memory_foreach(riscv_print_memory, NULL);
762
763 /* Cannot map memory above largest page number */
764 const uint64_t maxppn = __SHIFTOUT_MASK(PTE_PPN) - 1;
765 const uint64_t memory_limit = ptoa(maxppn);
766
767 if (memory_end > memory_limit) {
768 fdt_memory_remove_range(memory_limit, memory_end);
769 memory_end = memory_limit;
770 }
771
772 uint64_t memory_size __unused = memory_end - memory_start;
773
774 VPRINTF("%s: memory start %" PRIx64 " end %" PRIx64 " (len %"
775 PRIx64 ")\n", __func__, memory_start, memory_end, memory_size);
776
777 /* Parse ramdisk, rndseed, and firmware's RNG from EFI */
778 fdt_probe_initrd();
779 fdt_probe_rndseed();
780 fdt_probe_efirng();
781
782 fdt_memory_remove_reserved(memory_start, memory_end);
783
784 fdt_memory_remove_range(dtb, dtbsize);
785 fdt_reserve_initrd();
786 fdt_reserve_rndseed();
787 fdt_reserve_efirng();
788
789 /* Perform PT build and VM init */
790 cpu_kernel_vm_init(memory_start, memory_end);
791
792 VPRINTF("bootargs: %s\n", boot_args);
793
794 parse_mi_bootargs(boot_args);
795
796 #ifdef DDB
797 if (boothowto & RB_KDB) {
798 printf("Entering DDB...\n");
799 cpu_Debugger();
800 }
801 #endif
802
803 extern char __kernel_text[];
804 extern char _end[];
805 // extern char __data_start[];
806 // extern char __rodata_start[];
807
808 vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
809 vaddr_t kernend = round_page((vaddr_t)_end);
810 paddr_t kernstart_phys __unused = KERN_VTOPHYS(kernstart);
811 paddr_t kernend_phys __unused = KERN_VTOPHYS(kernend);
812
813 vaddr_t kernelvmstart;
814
815 vaddr_t kernstart_mega __unused = MEGAPAGE_TRUNC(kernstart);
816 vaddr_t kernend_mega = MEGAPAGE_ROUND(kernend);
817
818 kernelvmstart = kernend_mega;
819
820 #if 0
821 #ifdef MODULAR
822 #define MODULE_RESERVED_MAX (1024 * 1024 * 128)
823 #define MODULE_RESERVED_SIZE (1024 * 1024 * 32) /* good enough? */
824 module_start = kernelvmstart;
825 module_end = kernend_mega + MODULE_RESERVED_SIZE;
826 if (module_end >= kernstart_mega + MODULE_RESERVED_MAX)
827 module_end = kernstart_mega + MODULE_RESERVED_MAX;
828 KASSERT(module_end > kernend_mega);
829 kernelvmstart = module_end;
830 #endif /* MODULAR */
831 #endif
832 KASSERT(kernelvmstart < VM_KERNEL_VM_BASE);
833
834 kernelvmstart = VM_KERNEL_VM_BASE;
835
836 /*
837 * msgbuf is allocated from the top of the last biggest memory block.
838 */
839 paddr_t msgbufaddr = 0;
840
841 #ifdef _LP64
842 /* XXX check all ranges for last one with a big enough hole */
843 msgbufaddr = memory_end - MSGBUFSIZE;
844 KASSERT(msgbufaddr != 0); /* no space for msgbuf */
845 fdt_memory_remove_range(msgbufaddr, msgbufaddr + MSGBUFSIZE);
846 msgbufaddr = RISCV_PA_TO_KVA(msgbufaddr);
847 VPRINTF("msgbufaddr = %#lx\n", msgbufaddr);
848 initmsgbuf((void *)msgbufaddr, MSGBUFSIZE);
849 #endif
850
851 KASSERT(msgbufaddr != 0); /* no space for msgbuf */
852 #ifdef _LP64
853 initmsgbuf((void *)RISCV_PA_TO_KVA(msgbufaddr), MSGBUFSIZE);
854 #endif
855
856 #define DPRINTF(v) VPRINTF("%24s = 0x%16lx\n", #v, (unsigned long)v);
857
858 VPRINTF("------------------------------------------\n");
859 DPRINTF(kern_vtopdiff);
860 DPRINTF(memory_start);
861 DPRINTF(memory_end);
862 DPRINTF(memory_size);
863 DPRINTF(kernstart_phys);
864 DPRINTF(kernend_phys)
865 DPRINTF(msgbufaddr);
866 // DPRINTF(physical_end);
867 DPRINTF(VM_MIN_KERNEL_ADDRESS);
868 DPRINTF(kernstart_mega);
869 DPRINTF(kernstart);
870 DPRINTF(kernend);
871 DPRINTF(kernend_mega);
872 #if 0
873 #ifdef MODULAR
874 DPRINTF(module_start);
875 DPRINTF(module_end);
876 #endif
877 #endif
878 DPRINTF(VM_MAX_KERNEL_ADDRESS);
879 #ifdef _LP64
880 DPRINTF(pmap_direct_base);
881 #endif
882 VPRINTF("------------------------------------------\n");
883
884 #undef DPRINTF
885
886 uvm_md_init();
887
888 /*
889 * pass memory pages to uvm
890 */
891 physmem = 0;
892 fdt_memory_foreach(riscv_add_memory, NULL);
893
894 pmap_bootstrap(kernelvmstart, VM_MAX_KERNEL_ADDRESS);
895
896 kasan_init();
897
898 /* Finish setting up lwp0 on our end before we call main() */
899 riscv_init_lwp0_uarea();
900
901
902 error = 0;
903 if ((boothowto & RB_MD1) == 0) {
904 VPRINTF("mpstart\n");
905 if (plat->fp_mpstart)
906 error = plat->fp_mpstart();
907 }
908 if (error)
909 printf("AP startup problems\n");
910 }
911
912
913 #ifdef __HAVE_MM_MD_KERNACC
914
915 #define IN_RANGE_P(addr, start, end) (start) <= (addr) && (addr) < (end)
916 #ifdef _LP64
917 #define IN_DIRECTMAP_P(va) \
918 IN_RANGE_P(va, RISCV_DIRECTMAP_START, RISCV_DIRECTMAP_END)
919 #else
920 #define IN_DIRECTMAP_P(va) false
921 #endif
922
923 int
924 mm_md_kernacc(void *ptr, vm_prot_t prot, bool *handled)
925 {
926 extern char __kernel_text[];
927 extern char _end[];
928 extern char __data_start[];
929
930 const vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
931 const vaddr_t kernend = round_page((vaddr_t)_end);
932 const vaddr_t data_start = (vaddr_t)__data_start;
933
934 const vaddr_t va = (vaddr_t)ptr;
935
936 *handled = false;
937 if (IN_RANGE_P(va, kernstart, kernend)) {
938 *handled = true;
939 if (va < data_start && (prot & VM_PROT_WRITE) != 0) {
940 return EFAULT;
941 }
942 } else if (IN_DIRECTMAP_P(va)) {
943 *handled = true;
944 }
945
946 return 0;
947 }
948 #endif
949
950
951 #ifdef _LP64
952 static void
953 pte_bits(void (*pr)(const char *, ...), pt_entry_t pte)
954 {
955 (*pr)("%c%c%c%c%c%c%c%c",
956 (pte & PTE_D) ? 'D' : '.',
957 (pte & PTE_A) ? 'A' : '.',
958 (pte & PTE_G) ? 'G' : '.',
959 (pte & PTE_U) ? 'U' : '.',
960 (pte & PTE_X) ? 'X' : '.',
961 (pte & PTE_W) ? 'W' : '.',
962 (pte & PTE_R) ? 'R' : '.',
963 (pte & PTE_V) ? 'V' : '.');
964 }
965
966 static void
967 dump_ln_table(paddr_t pdp_pa, int topbit, int level, vaddr_t va,
968 void (*pr)(const char *, ...) __printflike(1, 2))
969 {
970 pd_entry_t *pdp = (void *)PMAP_DIRECT_MAP(pdp_pa);
971
972 (*pr)("l%u @ pa %#16" PRIxREGISTER "\n", level, pdp_pa);
973 for (size_t i = 0; i < PAGE_SIZE / sizeof(pd_entry_t); i++) {
974 pd_entry_t entry = pdp[i];
975
976 if (topbit) {
977 va = i << (PGSHIFT + level * SEGLENGTH);
978 if (va & __BIT(topbit)) {
979 va |= __BITS(63, topbit);
980 }
981 }
982 if (entry != 0) {
983 paddr_t pa = __SHIFTOUT(entry, PTE_PPN) << PGSHIFT;
984 // check level PPN bits.
985 if (PTE_ISLEAF_P(entry)) {
986 (*pr)("l%u %3zu va 0x%016lx pa 0x%012lx - ",
987 level, i, va, pa);
988 pte_bits(pr, entry);
989 (*pr)("\n");
990 } else {
991 (*pr)("l%u %3zu va 0x%016lx -> 0x%012lx - ",
992 level, i, va, pa);
993 pte_bits(pr, entry);
994 (*pr)("\n");
995 if (level == 0) {
996 (*pr)("wtf\n");
997 continue;
998 }
999 if (pte_pde_valid_p(entry))
1000 dump_ln_table(pa, 0, level - 1, va, pr);
1001 }
1002 }
1003 va += 1UL << (PGSHIFT + level * SEGLENGTH);
1004 }
1005 }
1006
1007 void
1008 pt_dump(void (*pr)(const char *, ...) __printflike(1, 2))
1009 {
1010 const register_t satp = csr_satp_read();
1011 size_t topbit = sizeof(long) * NBBY - 1;
1012
1013 #ifdef _LP64
1014 const paddr_t satp_pa = __SHIFTOUT(satp, SATP_PPN) << PGSHIFT;
1015 const uint8_t mode = __SHIFTOUT(satp, SATP_MODE);
1016 u_int level = 1;
1017
1018 switch (mode) {
1019 case SATP_MODE_SV39:
1020 case SATP_MODE_SV48:
1021 topbit = (39 - 1) + (mode - 8) * SEGLENGTH;
1022 level = mode - 6;
1023 break;
1024 }
1025 #endif
1026 (*pr)("topbit = %zu\n", topbit);
1027
1028 (*pr)("satp = 0x%" PRIxREGISTER "\n", satp);
1029 #ifdef _LP64
1030 dump_ln_table(satp_pa, topbit, level, 0, pr);
1031 #endif
1032 }
1033 #endif
1034
1035 void
1036 consinit(void)
1037 {
1038 static bool initialized = false;
1039 const struct fdt_console *cons = fdtbus_get_console();
1040 const struct fdt_platform *plat = fdt_platform_find();
1041
1042 if (initialized || cons == NULL)
1043 return;
1044
1045 u_int uart_freq = 0;
1046 extern struct bus_space riscv_generic_bs_tag;
1047 struct fdt_attach_args faa = {
1048 .faa_bst = &riscv_generic_bs_tag,
1049 };
1050
1051 faa.faa_phandle = fdtbus_get_stdout_phandle();
1052 if (plat->fp_uart_freq != NULL)
1053 uart_freq = plat->fp_uart_freq();
1054
1055 cons->consinit(&faa, uart_freq);
1056
1057 initialized = true;
1058 }
1059