riscv_machdep.c revision 1.43 1 /* $NetBSD: riscv_machdep.c,v 1.43 2025/03/02 08:14:26 skrll Exp $ */
2
3 /*-
4 * Copyright (c) 2014, 2019, 2022 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matt Thomas of 3am Software Foundry, and by Nick Hudson.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include "opt_ddb.h"
33 #include "opt_modular.h"
34 #include "opt_multiprocessor.h"
35 #include "opt_riscv_debug.h"
36
37 #include <sys/cdefs.h>
38 __RCSID("$NetBSD: riscv_machdep.c,v 1.43 2025/03/02 08:14:26 skrll Exp $");
39
40 #include <sys/param.h>
41
42 #include <sys/asan.h>
43 #include <sys/boot_flag.h>
44 #include <sys/cpu.h>
45 #include <sys/exec.h>
46 #include <sys/kmem.h>
47 #include <sys/ktrace.h>
48 #include <sys/lwp.h>
49 #include <sys/module.h>
50 #include <sys/mount.h>
51 #include <sys/msgbuf.h>
52 #include <sys/optstr.h>
53 #include <sys/proc.h>
54 #include <sys/reboot.h>
55 #include <sys/syscall.h>
56 #include <sys/sysctl.h>
57 #include <sys/systm.h>
58
59 #include <dev/cons.h>
60 #ifdef __HAVE_MM_MD_KERNACC
61 #include <dev/mm.h>
62 #endif
63
64 #include <uvm/uvm_extern.h>
65
66 #include <riscv/frame.h>
67 #include <riscv/locore.h>
68 #include <riscv/machdep.h>
69 #include <riscv/pte.h>
70 #include <riscv/sbi.h>
71 #include <riscv/userret.h>
72
73 #include <libfdt.h>
74 #include <dev/fdt/fdtvar.h>
75 #include <dev/fdt/fdt_boot.h>
76 #include <dev/fdt/fdt_memory.h>
77 #include <dev/fdt/fdt_private.h>
78
79 int cpu_printfataltraps = 1;
80 char machine[] = MACHINE;
81 char machine_arch[] = MACHINE_ARCH;
82
83 #ifdef VERBOSE_INIT_RISCV
84 #define VPRINTF(...) printf(__VA_ARGS__)
85 #else
86 #define VPRINTF(...) __nothing
87 #endif
88
89 /* 64 should be enough, even for a ZFS UUID */
90 #define MAX_BOOT_DEV_STR 64
91
92 char bootdevstr[MAX_BOOT_DEV_STR] = "";
93 char *boot_args = NULL;
94
95 paddr_t physical_start;
96 paddr_t physical_end;
97
98 static void
99 earlyconsputc(dev_t dev, int c)
100 {
101 uartputc(c);
102 }
103
104 static int
105 earlyconsgetc(dev_t dev)
106 {
107 return uartgetc();
108 }
109
110 static struct consdev earlycons = {
111 .cn_putc = earlyconsputc,
112 .cn_getc = earlyconsgetc,
113 .cn_pollc = nullcnpollc,
114 };
115
116 struct vm_map *phys_map;
117
118 struct trapframe cpu_ddb_regs;
119 const pcu_ops_t * const pcu_ops_md_defs[PCU_UNIT_COUNT] = {
120 #ifdef FPE
121 [PCU_FPU] = &pcu_fpu_ops,
122 #endif
123 };
124
125 /*
126 * Used by PHYSTOV and VTOPHYS -- Will be set be BSS is zeroed so
127 * keep it in data
128 */
129 unsigned long kern_vtopdiff __attribute__((__section__(".data")));
130
131
132 /*
133 * machine dependent system variables.
134 */
135 SYSCTL_SETUP(sysctl_machdep_setup, "sysctl machdep subtree setup")
136 {
137 sysctl_createv(clog, 0, NULL, NULL,
138 CTLFLAG_PERMANENT,
139 CTLTYPE_NODE, "machdep", NULL,
140 NULL, 0, NULL, 0,
141 CTL_MACHDEP, CTL_EOL);
142 }
143
144 #ifdef MODULAR
145 /*
146 * Push any modules loaded by the boot loader.
147 */
148 void
149 module_init_md(void)
150 {
151 }
152 #endif /* MODULAR */
153
154 /*
155 * Set registers on exec.
156 * Clear all registers except sp, pc.
157 * sp is set to the stack pointer passed in. pc is set to the entry
158 * point given by the exec_package passed in.
159 */
160 void
161 setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
162 {
163 struct trapframe * const tf = l->l_md.md_utf;
164 struct proc * const p = l->l_proc;
165
166 memset(tf, 0, sizeof(*tf));
167 tf->tf_sp = (intptr_t)stack_align(stack);
168 tf->tf_pc = (intptr_t)pack->ep_entry & ~1;
169 #ifdef _LP64
170 tf->tf_sr = (p->p_flag & PK_32) ? SR_USER32 : SR_USER64;
171 #else
172 tf->tf_sr = SR_USER;
173 #endif
174
175 // Set up arguments for ___start(cleanup, ps_strings)
176 tf->tf_a0 = 0; // cleanup
177 tf->tf_a1 = p->p_psstrp; // ps_strings
178
179 /*
180 * Must have interrupts disabled for exception return.
181 * Must be switching to user mode.
182 * Must enable interrupts after sret.
183 */
184 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SIE) == 0);
185 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPP) == 0);
186 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPIE) != 0);
187 }
188
189 void
190 md_child_return(struct lwp *l)
191 {
192 struct trapframe * const tf = lwp_trapframe(l);
193
194 tf->tf_a0 = 0;
195 tf->tf_a1 = 1;
196 #ifdef FPE
197 /* Disable FP as we can't be using it (yet). */
198 tf->tf_sr &= ~SR_FS;
199 #endif
200
201 /*
202 * Must have interrupts disabled for exception return.
203 * Must be switching to user mode.
204 * Must enable interrupts after sret.
205 */
206
207 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SIE) == 0);
208 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPP) == 0);
209 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPIE) != 0);
210
211 userret(l);
212 }
213
214 /*
215 * Process the tail end of a posix_spawn() for the child.
216 */
217 void
218 cpu_spawn_return(struct lwp *l)
219 {
220 userret(l);
221 }
222
223 /*
224 * Start a new LWP
225 */
226 void
227 startlwp(void *arg)
228 {
229 ucontext_t * const uc = arg;
230 lwp_t * const l = curlwp;
231 int error __diagused;
232
233 error = cpu_setmcontext(l, &uc->uc_mcontext, uc->uc_flags);
234 KASSERT(error == 0);
235
236 kmem_free(uc, sizeof(*uc));
237 userret(l);
238 }
239
240 // We've worked hard to make sure struct reg and __gregset_t are the same.
241 // Ditto for struct fpreg and fregset_t.
242
243 #ifdef _LP64
244 CTASSERT(sizeof(struct reg) == sizeof(__gregset_t));
245 #endif
246 CTASSERT(sizeof(struct fpreg) == sizeof(__fregset_t));
247
248 void
249 cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags)
250 {
251 const struct trapframe * const tf = l->l_md.md_utf;
252
253 /* Save register context. */
254 *(struct reg *)mcp->__gregs = tf->tf_regs;
255
256 *flags |= _UC_CPU | _UC_TLSBASE;
257
258 /* Save floating point register context, if any. */
259 KASSERT(l == curlwp);
260 if (fpu_valid_p(l)) {
261 /*
262 * If this process is the current FP owner, dump its
263 * context to the PCB first.
264 */
265 fpu_save(l);
266
267 struct pcb * const pcb = lwp_getpcb(l);
268 *(struct fpreg *)mcp->__fregs = pcb->pcb_fpregs;
269 *flags |= _UC_FPU;
270 }
271 }
272
273 int
274 cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp)
275 {
276 /*
277 * Verify that at least the PC and SP are user addresses.
278 */
279 if ((intptr_t) mcp->__gregs[_REG_PC] < 0
280 || (intptr_t) mcp->__gregs[_REG_SP] < 0
281 || (mcp->__gregs[_REG_PC] & 1))
282 return EINVAL;
283
284 return 0;
285 }
286
287 int
288 cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags)
289 {
290 struct trapframe * const tf = l->l_md.md_utf;
291 struct proc * const p = l->l_proc;
292 const __greg_t * const gr = mcp->__gregs;
293 int error;
294
295 /* Restore register context, if any. */
296 if (flags & _UC_CPU) {
297 error = cpu_mcontext_validate(l, mcp);
298 if (error)
299 return error;
300
301 /*
302 * Avoid updating TLS register here.
303 */
304 const __greg_t saved_tp = tf->tf_reg[_REG_TP];
305 tf->tf_regs = *(const struct reg *)gr;
306 tf->tf_reg[_REG_TP] = saved_tp;
307 }
308
309 /* Restore the private thread context */
310 if (flags & _UC_TLSBASE) {
311 lwp_setprivate(l, (void *)(intptr_t)mcp->__gregs[_X_TP]);
312 }
313
314 /* Restore floating point register context, if any. */
315 if (flags & _UC_FPU) {
316 KASSERT(l == curlwp);
317 /* Tell PCU we are replacing the FPU contents. */
318 fpu_replace(l);
319
320 /*
321 * The PCB FP regs struct includes the FP CSR, so use the
322 * proper size of fpreg when copying.
323 */
324 struct pcb * const pcb = lwp_getpcb(l);
325 pcb->pcb_fpregs = *(const struct fpreg *)mcp->__fregs;
326 }
327
328 mutex_enter(p->p_lock);
329 if (flags & _UC_SETSTACK)
330 l->l_sigstk.ss_flags |= SS_ONSTACK;
331 if (flags & _UC_CLRSTACK)
332 l->l_sigstk.ss_flags &= ~SS_ONSTACK;
333 mutex_exit(p->p_lock);
334
335 return 0;
336 }
337
338 void
339 cpu_need_resched(struct cpu_info *ci, struct lwp *l, int flags)
340 {
341 KASSERT(kpreempt_disabled());
342
343 if ((flags & RESCHED_KPREEMPT) != 0) {
344 #ifdef __HAVE_PREEMPTION
345 if ((flags & RESCHED_REMOTE) != 0) {
346 cpu_send_ipi(ci, IPI_KPREEMPT);
347 } else {
348 softint_trigger(SOFTINT_KPREEMPT);
349 }
350 #endif
351 return;
352 }
353 if ((flags & RESCHED_REMOTE) != 0) {
354 #ifdef MULTIPROCESSOR
355 cpu_send_ipi(ci, IPI_AST);
356 #endif
357 } else {
358 l->l_md.md_astpending = 1; /* force call to ast() */
359 }
360 }
361
362 void
363 cpu_signotify(struct lwp *l)
364 {
365 KASSERT(kpreempt_disabled());
366 #ifdef __HAVE_FAST_SOFTINTS
367 KASSERT(lwp_locked(l, NULL));
368 #endif
369
370 if (l->l_cpu != curcpu()) {
371 #ifdef MULTIPROCESSOR
372 cpu_send_ipi(l->l_cpu, IPI_AST);
373 #endif
374 } else {
375 l->l_md.md_astpending = 1; /* force call to ast() */
376 }
377 }
378
379 void
380 cpu_need_proftick(struct lwp *l)
381 {
382 KASSERT(kpreempt_disabled());
383 KASSERT(l->l_cpu == curcpu());
384
385 l->l_pflag |= LP_OWEUPC;
386 l->l_md.md_astpending = 1; /* force call to ast() */
387 }
388
389
390 /* Sync the discs, unmount the filesystems, and adjust the todr */
391 static void
392 bootsync(void)
393 {
394 static bool bootsyncdone = false;
395
396 if (bootsyncdone)
397 return;
398
399 bootsyncdone = true;
400
401 /* Make sure we can still manage to do things */
402 if ((csr_sstatus_read() & SR_SIE) == 0) {
403 /*
404 * If we get here then boot has been called without RB_NOSYNC
405 * and interrupts were disabled. This means the boot() call
406 * did not come from a user process e.g. shutdown, but must
407 * have come from somewhere in the kernel.
408 */
409 ENABLE_INTERRUPTS();
410 printf("Warning interrupts disabled during boot()\n");
411 }
412
413 vfs_shutdown();
414 }
415
416
417 void
418 cpu_reboot(int howto, char *bootstr)
419 {
420
421 /*
422 * If RB_NOSYNC was not specified sync the discs.
423 * Note: Unless cold is set to 1 here, syslogd will die during the
424 * unmount. It looks like syslogd is getting woken up only to find
425 * that it cannot page part of the binary in as the filesystem has
426 * been unmounted.
427 */
428 if ((howto & RB_NOSYNC) == 0)
429 bootsync();
430
431 #if 0
432 /* Disable interrupts. */
433 const int s = splhigh();
434
435 /* Do a dump if requested. */
436 if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP)
437 dumpsys();
438
439 splx(s);
440 #endif
441
442 pmf_system_shutdown(boothowto);
443
444 /* Say NO to interrupts for good */
445 splhigh();
446
447 /* Run any shutdown hooks */
448 doshutdownhooks();
449
450 /* Make sure IRQ's are disabled */
451 DISABLE_INTERRUPTS();
452
453 if (howto & RB_HALT) {
454 printf("\n");
455 printf("The operating system has halted.\n");
456 printf("Please press any key to reboot.\n\n");
457 cnpollc(true); /* for proper keyboard command handling */
458 if (cngetc() == 0) {
459 /* no console attached, so just hlt */
460 printf("No keyboard - cannot reboot after all.\n");
461 goto spin;
462 }
463 cnpollc(false);
464 }
465
466 printf("rebooting...\n");
467
468 sbi_system_reset(SBI_RESET_TYPE_COLDREBOOT, SBI_RESET_REASON_NONE);
469 spin:
470 for (;;) {
471 asm volatile("wfi" ::: "memory");
472 }
473 /* NOTREACHED */
474 }
475
476 void
477 cpu_dumpconf(void)
478 {
479 // TBD!!
480 }
481
482
483 int
484 cpu_lwp_setprivate(lwp_t *l, void *addr)
485 {
486 struct trapframe * const tf = lwp_trapframe(l);
487
488 tf->tf_reg[_REG_TP] = (register_t)addr;
489
490 return 0;
491 }
492
493
494 void
495 cpu_startup(void)
496 {
497 vaddr_t minaddr, maxaddr;
498 char pbuf[10]; /* "999999 MB" -- But Sv39 is max 512GB */
499
500 /*
501 * Good {morning,afternoon,evening,night}.
502 */
503 printf("%s%s", copyright, version);
504 format_bytes(pbuf, sizeof(pbuf), ctob(physmem));
505 printf("total memory = %s\n", pbuf);
506
507 minaddr = 0;
508 /*
509 * Allocate a submap for physio.
510 */
511 phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
512 VM_PHYS_SIZE, 0, FALSE, NULL);
513
514 format_bytes(pbuf, sizeof(pbuf), ptoa(uvm_availmem(false)));
515 printf("avail memory = %s\n", pbuf);
516
517 #ifdef MULTIPROCESSOR
518 kcpuset_create(&cpus_halted, true);
519 KASSERT(cpus_halted != NULL);
520
521 kcpuset_create(&cpus_hatched, true);
522 KASSERT(cpus_hatched != NULL);
523
524 kcpuset_create(&cpus_paused, true);
525 KASSERT(cpus_paused != NULL);
526
527 kcpuset_create(&cpus_resumed, true);
528 KASSERT(cpus_resumed != NULL);
529
530 kcpuset_create(&cpus_running, true);
531 KASSERT(cpus_running != NULL);
532
533 kcpuset_set(cpus_hatched, cpu_index(curcpu()));
534 kcpuset_set(cpus_running, cpu_index(curcpu()));
535 #endif
536
537 fdtbus_intr_init();
538
539 fdt_setup_rndseed();
540 fdt_setup_efirng();
541 }
542
543 static void
544 riscv_add_memory(const struct fdt_memory *m, void *arg)
545 {
546 paddr_t first = atop(m->start);
547 paddr_t last = atop(m->end);
548 int freelist = VM_FREELIST_DEFAULT;
549
550 VPRINTF("adding %#16" PRIxPADDR " - %#16" PRIxPADDR" to freelist %d\n",
551 m->start, m->end, freelist);
552
553 uvm_page_physload(first, last, first, last, freelist);
554 physmem += last - first;
555 }
556
557
558 static void
559 cpu_kernel_vm_init(paddr_t memory_start, paddr_t memory_end)
560 {
561 extern char __kernel_text[];
562 extern char _end[];
563
564 vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
565 vaddr_t kernend = round_page((vaddr_t)_end);
566 paddr_t kernstart_phys = KERN_VTOPHYS(kernstart);
567 paddr_t kernend_phys = KERN_VTOPHYS(kernend);
568
569 VPRINTF("%s: kernel phys start %#" PRIxPADDR " end %#" PRIxPADDR "\n",
570 __func__, kernstart_phys, kernend_phys);
571 fdt_memory_remove_range(kernstart_phys,
572 kernend_phys - kernstart_phys);
573
574 #if 0
575 /*
576 * Don't give these pages to UVM.
577 *
578 * cpu_kernel_vm_init need to create proper tables then the following
579 * will be true.
580 *
581 * Now we have APs started the pages used for stacks and L1PT can
582 * be given to uvm
583 */
584 extern char const __start__init_memory[];
585 extern char const __stop__init_memory[] __weak;
586 if (&__start__init_memory[0] != &__stop__init_memory[0]) {
587 const paddr_t spa = KERN_VTOPHYS((vaddr_t)__start__init_memory);
588 const paddr_t epa = KERN_VTOPHYS((vaddr_t)__stop__init_memory);
589
590 VPRINTF("%s: init phys start %#" PRIxPADDR
591 " end %#" PRIxPADDR "\n", __func__, spa, epa);
592 fdt_memory_remove_range(spa, epa - spa);
593 }
594 #endif
595
596 #ifdef _LP64
597 paddr_t pa = memory_start & ~XSEGOFSET;
598 pmap_direct_base = RISCV_DIRECTMAP_START;
599 extern pd_entry_t l2_pte[PAGE_SIZE / sizeof(pd_entry_t)];
600
601
602 const vsize_t vshift = XSEGSHIFT;
603 const vaddr_t pdetab_mask = PMAP_PDETABSIZE - 1;
604 const vsize_t inc = 1UL << vshift;
605
606 const vaddr_t sva = RISCV_DIRECTMAP_START + pa;
607 const vaddr_t eva = RISCV_DIRECTMAP_END;
608 const size_t sidx = (sva >> vshift) & pdetab_mask;
609 const size_t eidx = (eva >> vshift) & pdetab_mask;
610
611 /* Allocate gigapages covering all physical memory in the direct map. */
612 for (size_t i = sidx; i < eidx && pa < memory_end; i++, pa += inc) {
613 l2_pte[i] = PA_TO_PTE(pa) | PTE_KERN | PTE_HARDWIRED | PTE_RW;
614 VPRINTF("dm: %p : %#" PRIxPADDR "\n", &l2_pte[i], l2_pte[i]);
615 }
616 #endif
617 // pt_dump(printf);
618 }
619
620 static void
621 riscv_init_lwp0_uarea(void)
622 {
623 extern char lwp0uspace[];
624
625 uvm_lwp_setuarea(&lwp0, (vaddr_t)lwp0uspace);
626 memset(&lwp0.l_md, 0, sizeof(lwp0.l_md));
627 memset(lwp_getpcb(&lwp0), 0, sizeof(struct pcb));
628
629 struct trapframe *tf = (struct trapframe *)(lwp0uspace + USPACE) - 1;
630 memset(tf, 0, sizeof(*tf));
631
632 lwp0.l_md.md_utf = lwp0.l_md.md_ktf = tf;
633 }
634
635
636 static void
637 riscv_print_memory(const struct fdt_memory *m, void *arg)
638 {
639
640 VPRINTF("FDT /memory @ 0x%" PRIx64 " size 0x%" PRIx64 "\n",
641 m->start, m->end - m->start);
642 }
643
644
645 static void
646 parse_mi_bootargs(char *args)
647 {
648 int howto;
649 bool found, start, skipping;
650
651 if (args == NULL)
652 return;
653
654 start = true;
655 skipping = false;
656 for (char *cp = args; *cp; cp++) {
657 /* check for "words" starting with a "-" only */
658 if (start) {
659 if (*cp == '-') {
660 skipping = false;
661 } else {
662 skipping = true;
663 }
664 start = false;
665 continue;
666 }
667
668 if (*cp == ' ') {
669 start = true;
670 skipping = false;
671 continue;
672 }
673
674 if (skipping) {
675 continue;
676 }
677
678 /* Check valid boot flags */
679 howto = 0;
680 BOOT_FLAG(*cp, howto);
681 if (!howto)
682 printf("bootflag '%c' not recognised\n", *cp);
683 else
684 boothowto |= howto;
685 }
686
687 found = optstr_get(args, "root", bootdevstr, sizeof(bootdevstr));
688 if (found) {
689 bootspec = bootdevstr;
690 }
691 }
692
693
694 void
695 init_riscv(register_t hartid, paddr_t dtb)
696 {
697
698 /* set temporally to work printf()/panic() even before consinit() */
699 cn_tab = &earlycons;
700
701 /* Load FDT */
702 const vaddr_t dtbva = VM_KERNEL_DTB_BASE + (dtb & (NBSEG - 1));
703 void *fdt_data = (void *)dtbva;
704 int error = fdt_check_header(fdt_data);
705 if (error != 0)
706 panic("fdt_check_header failed: %s", fdt_strerror(error));
707
708 fdtbus_init(fdt_data);
709
710 /* Lookup platform specific backend */
711 const struct fdt_platform * const plat = fdt_platform_find();
712 if (plat == NULL)
713 panic("Kernel does not support this device");
714
715 /* Early console may be available, announce ourselves. */
716 VPRINTF("FDT<%p>\n", fdt_data);
717
718 boot_args = fdt_get_bootargs();
719
720 VPRINTF("devmap %p\n", plat->fp_devmap());
721 pmap_devmap_bootstrap(0, plat->fp_devmap());
722
723 VPRINTF("bootstrap\n");
724 plat->fp_bootstrap();
725
726 /*
727 * If stdout-path is specified on the command line, override the
728 * value in /chosen/stdout-path before initializing console.
729 */
730 VPRINTF("stdout\n");
731 fdt_update_stdout_path(fdt_data, boot_args);
732
733 /*
734 * Done making changes to the FDT.
735 */
736 fdt_pack(fdt_data);
737
738 const uint32_t dtbsize = round_page(fdt_totalsize(fdt_data));
739
740 VPRINTF("fdt size %x/%x\n", dtbsize, fdt_totalsize(fdt_data));
741
742 VPRINTF("consinit ");
743 consinit();
744 VPRINTF("ok\n");
745
746 /* Talk to the user */
747 printf("NetBSD/riscv (fdt) booting ...\n");
748
749 #ifdef BOOT_ARGS
750 char mi_bootargs[] = BOOT_ARGS;
751 parse_mi_bootargs(mi_bootargs);
752 #endif
753
754 uint64_t memory_start, memory_end;
755 fdt_memory_get(&memory_start, &memory_end);
756 physical_start = memory_start;
757 physical_end = memory_end;
758
759 fdt_memory_foreach(riscv_print_memory, NULL);
760
761 /* Cannot map memory above largest page number */
762 const uint64_t maxppn = __SHIFTOUT_MASK(PTE_PPN) - 1;
763 const uint64_t memory_limit = ptoa(maxppn);
764
765 if (memory_end > memory_limit) {
766 fdt_memory_remove_range(memory_limit, memory_end);
767 memory_end = memory_limit;
768 }
769
770 uint64_t memory_size __unused = memory_end - memory_start;
771
772 VPRINTF("%s: memory start %" PRIx64 " end %" PRIx64 " (len %"
773 PRIx64 ")\n", __func__, memory_start, memory_end, memory_size);
774
775 /* Parse ramdisk, rndseed, and firmware's RNG from EFI */
776 fdt_probe_initrd();
777 fdt_probe_rndseed();
778 fdt_probe_efirng();
779
780 fdt_memory_remove_reserved(memory_start, memory_end);
781
782 fdt_memory_remove_range(dtb, dtbsize);
783 fdt_reserve_initrd();
784 fdt_reserve_rndseed();
785 fdt_reserve_efirng();
786
787 /* Perform PT build and VM init */
788 cpu_kernel_vm_init(memory_start, memory_end);
789
790 VPRINTF("bootargs: %s\n", boot_args);
791
792 parse_mi_bootargs(boot_args);
793
794 #ifdef DDB
795 if (boothowto & RB_KDB) {
796 printf("Entering DDB...\n");
797 cpu_Debugger();
798 }
799 #endif
800
801 extern char __kernel_text[];
802 extern char _end[];
803 // extern char __data_start[];
804 // extern char __rodata_start[];
805
806 vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
807 vaddr_t kernend = round_page((vaddr_t)_end);
808 paddr_t kernstart_phys __unused = KERN_VTOPHYS(kernstart);
809 paddr_t kernend_phys __unused = KERN_VTOPHYS(kernend);
810
811 vaddr_t kernelvmstart;
812
813 vaddr_t kernstart_mega __unused = MEGAPAGE_TRUNC(kernstart);
814 vaddr_t kernend_mega = MEGAPAGE_ROUND(kernend);
815
816 kernelvmstart = kernend_mega;
817
818 #if 0
819 #ifdef MODULAR
820 #define MODULE_RESERVED_MAX (1024 * 1024 * 128)
821 #define MODULE_RESERVED_SIZE (1024 * 1024 * 32) /* good enough? */
822 module_start = kernelvmstart;
823 module_end = kernend_mega + MODULE_RESERVED_SIZE;
824 if (module_end >= kernstart_mega + MODULE_RESERVED_MAX)
825 module_end = kernstart_mega + MODULE_RESERVED_MAX;
826 KASSERT(module_end > kernend_mega);
827 kernelvmstart = module_end;
828 #endif /* MODULAR */
829 #endif
830 KASSERT(kernelvmstart < VM_KERNEL_VM_BASE);
831
832 kernelvmstart = VM_KERNEL_VM_BASE;
833
834 /*
835 * msgbuf is allocated from the top of the last biggest memory block.
836 */
837 paddr_t msgbufaddr = 0;
838
839 #ifdef _LP64
840 /* XXX check all ranges for last one with a big enough hole */
841 msgbufaddr = memory_end - MSGBUFSIZE;
842 KASSERT(msgbufaddr != 0); /* no space for msgbuf */
843 fdt_memory_remove_range(msgbufaddr, msgbufaddr + MSGBUFSIZE);
844 msgbufaddr = RISCV_PA_TO_KVA(msgbufaddr);
845 VPRINTF("msgbufaddr = %#lx\n", msgbufaddr);
846 initmsgbuf((void *)msgbufaddr, MSGBUFSIZE);
847 #endif
848
849 KASSERT(msgbufaddr != 0); /* no space for msgbuf */
850 #ifdef _LP64
851 initmsgbuf((void *)RISCV_PA_TO_KVA(msgbufaddr), MSGBUFSIZE);
852 #endif
853
854 #define DPRINTF(v) VPRINTF("%24s = 0x%16lx\n", #v, (unsigned long)v);
855
856 VPRINTF("------------------------------------------\n");
857 DPRINTF(kern_vtopdiff);
858 DPRINTF(memory_start);
859 DPRINTF(memory_end);
860 DPRINTF(memory_size);
861 DPRINTF(kernstart_phys);
862 DPRINTF(kernend_phys)
863 DPRINTF(msgbufaddr);
864 // DPRINTF(physical_end);
865 DPRINTF(VM_MIN_KERNEL_ADDRESS);
866 DPRINTF(kernstart_mega);
867 DPRINTF(kernstart);
868 DPRINTF(kernend);
869 DPRINTF(kernend_mega);
870 #if 0
871 #ifdef MODULAR
872 DPRINTF(module_start);
873 DPRINTF(module_end);
874 #endif
875 #endif
876 DPRINTF(VM_MAX_KERNEL_ADDRESS);
877 #ifdef _LP64
878 DPRINTF(pmap_direct_base);
879 #endif
880 VPRINTF("------------------------------------------\n");
881
882 #undef DPRINTF
883
884 uvm_md_init();
885
886 /*
887 * pass memory pages to uvm
888 */
889 physmem = 0;
890 fdt_memory_foreach(riscv_add_memory, NULL);
891
892 pmap_bootstrap(kernelvmstart, VM_MAX_KERNEL_ADDRESS);
893
894 kasan_init();
895
896 /* Finish setting up lwp0 on our end before we call main() */
897 riscv_init_lwp0_uarea();
898
899
900 error = 0;
901 if ((boothowto & RB_MD1) == 0) {
902 VPRINTF("mpstart\n");
903 if (plat->fp_mpstart)
904 error = plat->fp_mpstart();
905 }
906 if (error)
907 printf("AP startup problems\n");
908 }
909
910
911 #ifdef __HAVE_MM_MD_KERNACC
912
913 #define IN_RANGE_P(addr, start, end) (start) <= (addr) && (addr) < (end)
914 #ifdef _LP64
915 #define IN_DIRECTMAP_P(va) \
916 IN_RANGE_P(va, RISCV_DIRECTMAP_START, RISCV_DIRECTMAP_END)
917 #else
918 #define IN_DIRECTMAP_P(va) false
919 #endif
920
921 int
922 mm_md_kernacc(void *ptr, vm_prot_t prot, bool *handled)
923 {
924 extern char __kernel_text[];
925 extern char _end[];
926 extern char __data_start[];
927
928 const vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
929 const vaddr_t kernend = round_page((vaddr_t)_end);
930 const vaddr_t data_start = (vaddr_t)__data_start;
931
932 const vaddr_t va = (vaddr_t)ptr;
933
934 *handled = false;
935 if (IN_RANGE_P(va, kernstart, kernend)) {
936 *handled = true;
937 if (va < data_start && (prot & VM_PROT_WRITE) != 0) {
938 return EFAULT;
939 }
940 } else if (IN_DIRECTMAP_P(va)) {
941 *handled = true;
942 }
943
944 return 0;
945 }
946 #endif
947
948
949 #ifdef _LP64
950 static void
951 pte_bits(void (*pr)(const char *, ...), pt_entry_t pte)
952 {
953 (*pr)("%c%c%c%c%c%c%c%c",
954 (pte & PTE_D) ? 'D' : '.',
955 (pte & PTE_A) ? 'A' : '.',
956 (pte & PTE_G) ? 'G' : '.',
957 (pte & PTE_U) ? 'U' : '.',
958 (pte & PTE_X) ? 'X' : '.',
959 (pte & PTE_W) ? 'W' : '.',
960 (pte & PTE_R) ? 'R' : '.',
961 (pte & PTE_V) ? 'V' : '.');
962 }
963
964 static void
965 dump_ln_table(paddr_t pdp_pa, int topbit, int level, vaddr_t va,
966 void (*pr)(const char *, ...) __printflike(1, 2))
967 {
968 pd_entry_t *pdp = (void *)PMAP_DIRECT_MAP(pdp_pa);
969
970 (*pr)("l%u @ pa %#16" PRIxREGISTER "\n", level, pdp_pa);
971 for (size_t i = 0; i < PAGE_SIZE / sizeof(pd_entry_t); i++) {
972 pd_entry_t entry = pdp[i];
973
974 if (topbit) {
975 va = i << (PGSHIFT + level * SEGLENGTH);
976 if (va & __BIT(topbit)) {
977 va |= __BITS(63, topbit);
978 }
979 }
980 if (entry != 0) {
981 paddr_t pa = __SHIFTOUT(entry, PTE_PPN) << PGSHIFT;
982 // check level PPN bits.
983 if (PTE_ISLEAF_P(entry)) {
984 (*pr)("l%u %3zu va 0x%016lx pa 0x%012lx - ",
985 level, i, va, pa);
986 pte_bits(pr, entry);
987 (*pr)("\n");
988 } else {
989 (*pr)("l%u %3zu va 0x%016lx -> 0x%012lx - ",
990 level, i, va, pa);
991 pte_bits(pr, entry);
992 (*pr)("\n");
993 if (level == 0) {
994 (*pr)("wtf\n");
995 continue;
996 }
997 if (pte_pde_valid_p(entry))
998 dump_ln_table(pa, 0, level - 1, va, pr);
999 }
1000 }
1001 va += 1UL << (PGSHIFT + level * SEGLENGTH);
1002 }
1003 }
1004
1005 void
1006 pt_dump(void (*pr)(const char *, ...) __printflike(1, 2))
1007 {
1008 const register_t satp = csr_satp_read();
1009 size_t topbit = sizeof(long) * NBBY - 1;
1010
1011 #ifdef _LP64
1012 const paddr_t satp_pa = __SHIFTOUT(satp, SATP_PPN) << PGSHIFT;
1013 const uint8_t mode = __SHIFTOUT(satp, SATP_MODE);
1014 u_int level = 1;
1015
1016 switch (mode) {
1017 case SATP_MODE_SV39:
1018 case SATP_MODE_SV48:
1019 topbit = (39 - 1) + (mode - 8) * SEGLENGTH;
1020 level = mode - 6;
1021 break;
1022 }
1023 #endif
1024 (*pr)("topbit = %zu\n", topbit);
1025
1026 (*pr)("satp = 0x%" PRIxREGISTER "\n", satp);
1027 #ifdef _LP64
1028 dump_ln_table(satp_pa, topbit, level, 0, pr);
1029 #endif
1030 }
1031 #endif
1032
1033 void
1034 consinit(void)
1035 {
1036 static bool initialized = false;
1037 const struct fdt_console *cons = fdtbus_get_console();
1038 const struct fdt_platform *plat = fdt_platform_find();
1039
1040 if (initialized || cons == NULL)
1041 return;
1042
1043 u_int uart_freq = 0;
1044 extern struct bus_space riscv_generic_bs_tag;
1045 struct fdt_attach_args faa = {
1046 .faa_bst = &riscv_generic_bs_tag,
1047 };
1048
1049 faa.faa_phandle = fdtbus_get_stdout_phandle();
1050 if (plat->fp_uart_freq != NULL)
1051 uart_freq = plat->fp_uart_freq();
1052
1053 cons->consinit(&faa, uart_freq);
1054
1055 initialized = true;
1056 }
1057