riscv_machdep.c revision 1.26 1 /* $NetBSD: riscv_machdep.c,v 1.26 2023/05/07 12:41:49 skrll Exp $ */
2
3 /*-
4 * Copyright (c) 2014, 2019, 2022 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matt Thomas of 3am Software Foundry, and by Nick Hudson.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include "opt_ddb.h"
33 #include "opt_modular.h"
34 #include "opt_riscv_debug.h"
35
36 #include <sys/cdefs.h>
37 __RCSID("$NetBSD: riscv_machdep.c,v 1.26 2023/05/07 12:41:49 skrll Exp $");
38
39 #include <sys/param.h>
40
41 #include <sys/asan.h>
42 #include <sys/boot_flag.h>
43 #include <sys/cpu.h>
44 #include <sys/exec.h>
45 #include <sys/kmem.h>
46 #include <sys/ktrace.h>
47 #include <sys/lwp.h>
48 #include <sys/module.h>
49 #include <sys/mount.h>
50 #include <sys/msgbuf.h>
51 #include <sys/optstr.h>
52 #include <sys/proc.h>
53 #include <sys/reboot.h>
54 #include <sys/syscall.h>
55 #include <sys/sysctl.h>
56 #include <sys/systm.h>
57
58 #include <dev/cons.h>
59 #include <uvm/uvm_extern.h>
60
61 #include <riscv/frame.h>
62 #include <riscv/locore.h>
63 #include <riscv/machdep.h>
64 #include <riscv/pte.h>
65 #include <riscv/sbi.h>
66
67 #include <libfdt.h>
68 #include <dev/fdt/fdtvar.h>
69 #include <dev/fdt/fdt_boot.h>
70 #include <dev/fdt/fdt_memory.h>
71 #include <dev/fdt/fdt_private.h>
72
73 int cpu_printfataltraps = 1;
74 char machine[] = MACHINE;
75 char machine_arch[] = MACHINE_ARCH;
76
77 #ifdef VERBOSE_INIT_RISCV
78 #define VPRINTF(...) printf(__VA_ARGS__)
79 #else
80 #define VPRINTF(...) __nothing
81 #endif
82
83 #ifndef FDT_MAX_BOOT_STRING
84 #define FDT_MAX_BOOT_STRING 1024
85 #endif
86 /* 64 should be enough, even for a ZFS UUID */
87 #define MAX_BOOT_DEV_STR 64
88
89 char bootargs[FDT_MAX_BOOT_STRING] = "";
90 char bootdevstr[MAX_BOOT_DEV_STR] = "";
91 char *boot_args = NULL;
92
93 paddr_t physical_start;
94 paddr_t physical_end;
95
96 static void
97 earlyconsputc(dev_t dev, int c)
98 {
99 uartputc(c);
100 }
101
102 static int
103 earlyconsgetc(dev_t dev)
104 {
105 return uartgetc();
106 }
107
108 static struct consdev earlycons = {
109 .cn_putc = earlyconsputc,
110 .cn_getc = earlyconsgetc,
111 .cn_pollc = nullcnpollc,
112 };
113
114 struct vm_map *phys_map;
115
116 struct trapframe cpu_ddb_regs;
117 const pcu_ops_t * const pcu_ops_md_defs[PCU_UNIT_COUNT] = {
118 #ifdef FPE
119 [PCU_FPU] = &pcu_fpu_ops,
120 #endif
121 };
122
123 /*
124 * Used by PHYSTOV and VTOPHYS -- Will be set be BSS is zeroed so
125 * keep it in data
126 */
127 unsigned long kern_vtopdiff __attribute__((__section__(".data")));
128
129
130 /*
131 * machine dependent system variables.
132 */
133 SYSCTL_SETUP(sysctl_machdep_setup, "sysctl machdep subtree setup")
134 {
135 sysctl_createv(clog, 0, NULL, NULL,
136 CTLFLAG_PERMANENT,
137 CTLTYPE_NODE, "machdep", NULL,
138 NULL, 0, NULL, 0,
139 CTL_MACHDEP, CTL_EOL);
140 }
141
142 void
143 delay(unsigned long us)
144 {
145 const uint32_t cycles_per_us = curcpu()->ci_data.cpu_cc_freq / 1000000;
146 const uint64_t cycles = (uint64_t)us * cycles_per_us;
147 const uint64_t finish = csr_cycle_read() + cycles;
148
149 while (csr_cycle_read() < finish) {
150 /* spin, baby spin */
151 }
152 }
153
154 #ifdef MODULAR
155 /*
156 * Push any modules loaded by the boot loader.
157 */
158 void
159 module_init_md(void)
160 {
161 }
162 #endif /* MODULAR */
163
164 /*
165 * Set registers on exec.
166 * Clear all registers except sp, pc.
167 * sp is set to the stack pointer passed in. pc is set to the entry
168 * point given by the exec_package passed in.
169 */
170 void
171 setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
172 {
173 struct trapframe * const tf = l->l_md.md_utf;
174 struct proc * const p = l->l_proc;
175
176 memset(tf, 0, sizeof(*tf));
177 tf->tf_sp = (intptr_t)stack_align(stack);
178 tf->tf_pc = (intptr_t)pack->ep_entry & ~1;
179 #ifdef _LP64
180 tf->tf_sr = (p->p_flag & PK_32) ? SR_USER32 : SR_USER64;
181 #else
182 tf->tf_sr = SR_USER;
183 #endif
184
185 // Set up arguments for ___start(cleanup, ps_strings)
186 tf->tf_a0 = 0; // cleanup
187 tf->tf_a1 = p->p_psstrp; // ps_strings
188
189 /*
190 * Must have interrupts disabled for exception return.
191 * Must be switching to user mode.
192 * Must enable interrupts after sret.
193 */
194 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SIE) == 0);
195 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPP) == 0);
196 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPIE) != 0);
197 }
198
199 void
200 md_child_return(struct lwp *l)
201 {
202 struct trapframe * const tf = lwp_trapframe(l);
203
204 tf->tf_a0 = 0;
205 tf->tf_a1 = 1;
206 #ifdef FPE
207 /* Disable FP as we can't be using it (yet). */
208 tf->tf_sr &= ~SR_FS;
209 #endif
210
211 /*
212 * Must have interrupts disabled for exception return.
213 * Must be switching to user mode.
214 * Must enable interrupts after sret.
215 */
216
217 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SIE) == 0);
218 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPP) == 0);
219 KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPIE) != 0);
220
221 userret(l);
222 }
223
224 void
225 cpu_spawn_return(struct lwp *l)
226 {
227 userret(l);
228 }
229
230 /*
231 * Start a new LWP
232 */
233 void
234 startlwp(void *arg)
235 {
236 ucontext_t * const uc = arg;
237 lwp_t * const l = curlwp;
238 int error __diagused;
239
240 error = cpu_setmcontext(l, &uc->uc_mcontext, uc->uc_flags);
241 KASSERT(error == 0);
242
243 kmem_free(uc, sizeof(*uc));
244 userret(l);
245 }
246
247 // We've worked hard to make sure struct reg and __gregset_t are the same.
248 // Ditto for struct fpreg and fregset_t.
249
250 #ifdef _LP64
251 CTASSERT(sizeof(struct reg) == sizeof(__gregset_t));
252 #endif
253 CTASSERT(sizeof(struct fpreg) == sizeof(__fregset_t));
254
255 void
256 cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags)
257 {
258 const struct trapframe * const tf = l->l_md.md_utf;
259
260 /* Save register context. */
261 *(struct reg *)mcp->__gregs = tf->tf_regs;
262
263 *flags |= _UC_CPU | _UC_TLSBASE;
264
265 /* Save floating point register context, if any. */
266 KASSERT(l == curlwp);
267 if (fpu_valid_p(l)) {
268 /*
269 * If this process is the current FP owner, dump its
270 * context to the PCB first.
271 */
272 fpu_save(l);
273
274 struct pcb * const pcb = lwp_getpcb(l);
275 *(struct fpreg *)mcp->__fregs = pcb->pcb_fpregs;
276 *flags |= _UC_FPU;
277 }
278 }
279
280 int
281 cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp)
282 {
283 /*
284 * Verify that at least the PC and SP are user addresses.
285 */
286 if ((intptr_t) mcp->__gregs[_REG_PC] < 0
287 || (intptr_t) mcp->__gregs[_REG_SP] < 0
288 || (mcp->__gregs[_REG_PC] & 1))
289 return EINVAL;
290
291 return 0;
292 }
293
294 int
295 cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags)
296 {
297 struct trapframe * const tf = l->l_md.md_utf;
298 struct proc * const p = l->l_proc;
299 const __greg_t * const gr = mcp->__gregs;
300 int error;
301
302 /* Restore register context, if any. */
303 if (flags & _UC_CPU) {
304 error = cpu_mcontext_validate(l, mcp);
305 if (error)
306 return error;
307
308 /* Save register context. */
309 tf->tf_regs = *(const struct reg *)gr;
310 }
311
312 /* Restore the private thread context */
313 if (flags & _UC_TLSBASE) {
314 lwp_setprivate(l, (void *)(intptr_t)mcp->__gregs[_X_TP]);
315 }
316
317 /* Restore floating point register context, if any. */
318 if (flags & _UC_FPU) {
319 KASSERT(l == curlwp);
320 /* Tell PCU we are replacing the FPU contents. */
321 fpu_replace(l);
322
323 /*
324 * The PCB FP regs struct includes the FP CSR, so use the
325 * proper size of fpreg when copying.
326 */
327 struct pcb * const pcb = lwp_getpcb(l);
328 pcb->pcb_fpregs = *(const struct fpreg *)mcp->__fregs;
329 }
330
331 mutex_enter(p->p_lock);
332 if (flags & _UC_SETSTACK)
333 l->l_sigstk.ss_flags |= SS_ONSTACK;
334 if (flags & _UC_CLRSTACK)
335 l->l_sigstk.ss_flags &= ~SS_ONSTACK;
336 mutex_exit(p->p_lock);
337
338 return 0;
339 }
340
341 void
342 cpu_need_resched(struct cpu_info *ci, struct lwp *l, int flags)
343 {
344 KASSERT(kpreempt_disabled());
345
346 if ((flags & RESCHED_KPREEMPT) != 0) {
347 #ifdef __HAVE_PREEMPTION
348 if ((flags & RESCHED_REMOTE) != 0) {
349 cpu_send_ipi(ci, IPI_KPREEMPT);
350 } else {
351 softint_trigger(SOFTINT_KPREEMPT);
352 }
353 #endif
354 return;
355 }
356 if ((flags & RESCHED_REMOTE) != 0) {
357 #ifdef MULTIPROCESSOR
358 cpu_send_ipi(ci, IPI_AST);
359 #endif
360 } else {
361 l->l_md.md_astpending = 1; /* force call to ast() */
362 }
363 }
364
365 void
366 cpu_signotify(struct lwp *l)
367 {
368 KASSERT(kpreempt_disabled());
369 #ifdef __HAVE_FAST_SOFTINTS
370 KASSERT(lwp_locked(l, NULL));
371 #endif
372
373 if (l->l_cpu != curcpu()) {
374 #ifdef MULTIPROCESSOR
375 cpu_send_ipi(ci, IPI_AST);
376 #endif
377 } else {
378 l->l_md.md_astpending = 1; /* force call to ast() */
379 }
380 }
381
382
383 void
384 cpu_need_proftick(struct lwp *l)
385 {
386 KASSERT(kpreempt_disabled());
387 KASSERT(l->l_cpu == curcpu());
388
389 l->l_pflag |= LP_OWEUPC;
390 l->l_md.md_astpending = 1; /* force call to ast() */
391 }
392
393
394 /* Sync the discs, unmount the filesystems, and adjust the todr */
395 static void
396 bootsync(void)
397 {
398 static bool bootsyncdone = false;
399
400 if (bootsyncdone)
401 return;
402
403 bootsyncdone = true;
404
405 /* Make sure we can still manage to do things */
406 if ((csr_sstatus_read() & SR_SIE) == 0) {
407 /*
408 * If we get here then boot has been called without RB_NOSYNC
409 * and interrupts were disabled. This means the boot() call
410 * did not come from a user process e.g. shutdown, but must
411 * have come from somewhere in the kernel.
412 */
413 ENABLE_INTERRUPTS();
414 printf("Warning interrupts disabled during boot()\n");
415 }
416
417 vfs_shutdown();
418
419 resettodr();
420 }
421
422
423 void
424 cpu_reboot(int howto, char *bootstr)
425 {
426
427 /*
428 * If RB_NOSYNC was not specified sync the discs.
429 * Note: Unless cold is set to 1 here, syslogd will die during the
430 * unmount. It looks like syslogd is getting woken up only to find
431 * that it cannot page part of the binary in as the filesystem has
432 * been unmounted.
433 */
434 if ((howto & RB_NOSYNC) == 0)
435 bootsync();
436
437 #if 0
438 /* Disable interrupts. */
439 const int s = splhigh();
440
441 /* Do a dump if requested. */
442 if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP)
443 dumpsys();
444
445 splx(s);
446 #endif
447
448 pmf_system_shutdown(boothowto);
449
450 /* Say NO to interrupts for good */
451 splhigh();
452
453 /* Run any shutdown hooks */
454 doshutdownhooks();
455
456 /* Make sure IRQ's are disabled */
457 DISABLE_INTERRUPTS();
458
459 sbi_system_reset(SBI_RESET_TYPE_COLDREBOOT, SBI_RESET_REASON_NONE);
460
461 for (;;) {
462 asm volatile("wfi" ::: "memory");
463 }
464 /* NOTREACHED */
465 }
466
467 void
468 cpu_dumpconf(void)
469 {
470 // TBD!!
471 }
472
473
474 int
475 cpu_lwp_setprivate(lwp_t *l, void *addr)
476 {
477 struct trapframe * const tf = lwp_trapframe(l);
478
479 tf->tf_reg[_REG_TP] = (register_t)addr;
480
481 return 0;
482 }
483
484
485 void
486 cpu_startup(void)
487 {
488 vaddr_t minaddr, maxaddr;
489 char pbuf[10]; /* "999999 MB" -- But Sv39 is max 512GB */
490
491 /*
492 * Good {morning,afternoon,evening,night}.
493 */
494 printf("%s%s", copyright, version);
495 format_bytes(pbuf, sizeof(pbuf), ctob(physmem));
496 printf("total memory = %s\n", pbuf);
497
498 minaddr = 0;
499 /*
500 * Allocate a submap for physio.
501 */
502 phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
503 VM_PHYS_SIZE, 0, FALSE, NULL);
504
505 format_bytes(pbuf, sizeof(pbuf), ptoa(uvm_availmem(false)));
506 printf("avail memory = %s\n", pbuf);
507
508 fdtbus_intr_init();
509 }
510
511 static void
512 riscv_add_memory(const struct fdt_memory *m, void *arg)
513 {
514 paddr_t first = atop(m->start);
515 paddr_t last = atop(m->end);
516 int freelist = VM_FREELIST_DEFAULT;
517
518 VPRINTF("adding %#16" PRIxPADDR " - %#16" PRIxPADDR" to freelist %d\n",
519 m->start, m->end, freelist);
520
521 uvm_page_physload(first, last, first, last, freelist);
522 physmem += last - first;
523 }
524
525
526 static void
527 cpu_kernel_vm_init(paddr_t memory_start, paddr_t memory_end)
528 {
529 extern char __kernel_text[];
530 extern char _end[];
531
532 vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
533 vaddr_t kernend = round_page((vaddr_t)_end);
534 paddr_t kernstart_phys = KERN_VTOPHYS(kernstart);
535 paddr_t kernend_phys = KERN_VTOPHYS(kernend);
536
537 VPRINTF("%s: kernel phys start %#" PRIxPADDR " end %#" PRIxPADDR "\n",
538 __func__, kernstart_phys, kernend_phys);
539 fdt_memory_remove_range(kernstart_phys,
540 kernend_phys - kernstart_phys);
541
542 /*
543 * Don't give these pages to UVM.
544 *
545 * cpu_kernel_vm_init need to create proper tables then the following
546 * will be true.
547 *
548 * Now we have APs started the pages used for stacks and L1PT can
549 * be given to uvm
550 */
551 extern char const __start__init_memory[];
552 extern char const __stop__init_memory[] __weak;
553 if (__start__init_memory != __stop__init_memory) {
554 const paddr_t spa = KERN_VTOPHYS((vaddr_t)__start__init_memory);
555 const paddr_t epa = KERN_VTOPHYS((vaddr_t)__stop__init_memory);
556
557 VPRINTF("%s: init phys start %#" PRIxPADDR
558 " end %#" PRIxPADDR "\n", __func__, spa, epa);
559 fdt_memory_remove_range(spa, epa - spa);
560 }
561
562 #ifdef _LP64
563 paddr_t pa = memory_start & ~XSEGOFSET;
564 pmap_direct_base = RISCV_DIRECTMAP_START;
565 extern pd_entry_t l2_pte[PAGE_SIZE / sizeof(pd_entry_t)];
566
567
568 const vsize_t vshift = XSEGSHIFT;
569 const vaddr_t pdetab_mask = PMAP_PDETABSIZE - 1;
570 const vsize_t inc = 1UL << vshift;
571
572 const vaddr_t sva = RISCV_DIRECTMAP_START + pa;
573 const vaddr_t eva = RISCV_DIRECTMAP_END;
574 const size_t sidx = (sva >> vshift) & pdetab_mask;
575 const size_t eidx = (eva >> vshift) & pdetab_mask;
576
577 /* Allocate gigapages covering all physical memory in the direct map. */
578 for (size_t i = sidx; i < eidx && pa < memory_end; i++, pa += inc) {
579 l2_pte[i] = PA_TO_PTE(pa) | PTE_KERN | PTE_HARDWIRED | PTE_RW;
580 VPRINTF("dm: %p : %#" PRIxPADDR "\n", &l2_pte[i], l2_pte[i]);
581 }
582 #endif
583 // pt_dump(printf);
584 }
585
586 static void
587 riscv_init_lwp0_uarea(void)
588 {
589 extern char lwp0uspace[];
590
591 uvm_lwp_setuarea(&lwp0, (vaddr_t)lwp0uspace);
592 memset(&lwp0.l_md, 0, sizeof(lwp0.l_md));
593 memset(lwp_getpcb(&lwp0), 0, sizeof(struct pcb));
594
595 struct trapframe *tf = (struct trapframe *)(lwp0uspace + USPACE) - 1;
596 memset(tf, 0, sizeof(*tf));
597
598 lwp0.l_md.md_utf = lwp0.l_md.md_ktf = tf;
599 }
600
601
602 static void
603 riscv_print_memory(const struct fdt_memory *m, void *arg)
604 {
605
606 VPRINTF("FDT /memory @ 0x%" PRIx64 " size 0x%" PRIx64 "\n",
607 m->start, m->end - m->start);
608 }
609
610
611 static void
612 parse_mi_bootargs(char *args)
613 {
614 int howto;
615 bool found, start, skipping;
616
617 if (args == NULL)
618 return;
619
620 start = true;
621 skipping = false;
622 for (char *cp = args; *cp; cp++) {
623 /* check for "words" starting with a "-" only */
624 if (start) {
625 if (*cp == '-') {
626 skipping = false;
627 } else {
628 skipping = true;
629 }
630 start = false;
631 continue;
632 }
633
634 if (*cp == ' ') {
635 start = true;
636 skipping = false;
637 continue;
638 }
639
640 if (skipping) {
641 continue;
642 }
643
644 /* Check valid boot flags */
645 howto = 0;
646 BOOT_FLAG(*cp, howto);
647 if (!howto)
648 printf("bootflag '%c' not recognised\n", *cp);
649 else
650 boothowto |= howto;
651 }
652
653 found = optstr_get(args, "root", bootdevstr, sizeof(bootdevstr));
654 if (found) {
655 bootspec = bootdevstr;
656 }
657 }
658
659
660 void
661 init_riscv(register_t hartid, paddr_t dtb)
662 {
663
664 /* set temporally to work printf()/panic() even before consinit() */
665 cn_tab = &earlycons;
666
667 /* Load FDT */
668 const vaddr_t dtbva = VM_KERNEL_DTB_BASE + (dtb & (NBSEG - 1));
669 void *fdt_data = (void *)dtbva;
670 int error = fdt_check_header(fdt_data);
671 if (error != 0)
672 panic("fdt_check_header failed: %s", fdt_strerror(error));
673
674 fdtbus_init(fdt_data);
675
676 /* Lookup platform specific backend */
677 const struct fdt_platform *plat = fdt_platform_find();
678 if (plat == NULL)
679 panic("Kernel does not support this device");
680
681 /* Early console may be available, announce ourselves. */
682 VPRINTF("FDT<%p>\n", fdt_data);
683
684 const int chosen = OF_finddevice("/chosen");
685 if (chosen >= 0)
686 OF_getprop(chosen, "bootargs", bootargs, sizeof(bootargs));
687 boot_args = bootargs;
688
689 VPRINTF("devmap %p\n", plat->fp_devmap());
690 pmap_devmap_bootstrap(0, plat->fp_devmap());
691
692 VPRINTF("bootstrap\n");
693 plat->fp_bootstrap();
694
695 /*
696 * If stdout-path is specified on the command line, override the
697 * value in /chosen/stdout-path before initializing console.
698 */
699 VPRINTF("stdout\n");
700 fdt_update_stdout_path(fdt_data, boot_args);
701
702 /*
703 * Done making changes to the FDT.
704 */
705 fdt_pack(fdt_data);
706
707 const uint32_t dtbsize = round_page(fdt_totalsize(fdt_data));
708
709 VPRINTF("fdt size %x/%x\n", dtbsize, fdt_totalsize(fdt_data));
710
711 VPRINTF("consinit ");
712 consinit();
713 VPRINTF("ok\n");
714
715 /* Talk to the user */
716 printf("NetBSD/riscv (fdt) booting ...\n");
717
718 #ifdef BOOT_ARGS
719 char mi_bootargs[] = BOOT_ARGS;
720 parse_mi_bootargs(mi_bootargs);
721 #endif
722
723 uint64_t memory_start, memory_end;
724 fdt_memory_get(&memory_start, &memory_end);
725 physical_start = memory_start;
726 physical_end = memory_end;
727
728 fdt_memory_foreach(riscv_print_memory, NULL);
729
730 /* Cannot map memory above largest page number */
731 const uint64_t maxppn = __SHIFTOUT_MASK(PTE_PPN) - 1;
732 const uint64_t memory_limit = ptoa(maxppn);
733
734 if (memory_end > memory_limit) {
735 fdt_memory_remove_range(memory_limit, memory_end);
736 memory_end = memory_limit;
737 }
738
739 uint64_t memory_size __unused = memory_end - memory_start;
740
741 VPRINTF("%s: memory start %" PRIx64 " end %" PRIx64 " (len %"
742 PRIx64 ")\n", __func__, memory_start, memory_end, memory_size);
743
744 fdt_memory_remove_reserved(memory_start, memory_end);
745
746 fdt_memory_remove_range(dtb, dtb + dtbsize);
747
748 /* Perform PT build and VM init */
749 cpu_kernel_vm_init(memory_start, memory_end);
750
751 VPRINTF("bootargs: %s\n", bootargs);
752
753 parse_mi_bootargs(boot_args);
754
755 #ifdef DDB
756 if (boothowto & RB_KDB) {
757 printf("Entering DDB...\n");
758 cpu_Debugger();
759 }
760 #endif
761
762 extern char __kernel_text[];
763 extern char _end[];
764 // extern char __data_start[];
765 // extern char __rodata_start[];
766
767 vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
768 vaddr_t kernend = round_page((vaddr_t)_end);
769 paddr_t kernstart_phys __unused = KERN_VTOPHYS(kernstart);
770 paddr_t kernend_phys __unused = KERN_VTOPHYS(kernend);
771
772 vaddr_t kernelvmstart;
773
774 vaddr_t kernstart_mega __unused = MEGAPAGE_TRUNC(kernstart);
775 vaddr_t kernend_mega = MEGAPAGE_ROUND(kernend);
776
777 kernelvmstart = kernend_mega;
778
779 #if 0
780 #ifdef MODULAR
781 #define MODULE_RESERVED_MAX (1024 * 1024 * 128)
782 #define MODULE_RESERVED_SIZE (1024 * 1024 * 32) /* good enough? */
783 module_start = kernelvmstart;
784 module_end = kernend_mega + MODULE_RESERVED_SIZE;
785 if (module_end >= kernstart_mega + MODULE_RESERVED_MAX)
786 module_end = kernstart_mega + MODULE_RESERVED_MAX;
787 KASSERT(module_end > kernend_mega);
788 kernelvmstart = module_end;
789 #endif /* MODULAR */
790 #endif
791 KASSERT(kernelvmstart < VM_KERNEL_VM_BASE);
792
793 kernelvmstart = VM_KERNEL_VM_BASE;
794
795 /*
796 * msgbuf is allocated from the top of the last biggest memory block.
797 */
798 paddr_t msgbufaddr = 0;
799
800 #ifdef _LP64
801 /* XXX check all ranges for last one with a big enough hole */
802 msgbufaddr = memory_end - MSGBUFSIZE;
803 KASSERT(msgbufaddr != 0); /* no space for msgbuf */
804 fdt_memory_remove_range(msgbufaddr, msgbufaddr + MSGBUFSIZE);
805 msgbufaddr = RISCV_PA_TO_KVA(msgbufaddr);
806 VPRINTF("msgbufaddr = %#lx\n", msgbufaddr);
807 initmsgbuf((void *)msgbufaddr, MSGBUFSIZE);
808 #endif
809
810 KASSERT(msgbufaddr != 0); /* no space for msgbuf */
811 #ifdef _LP64
812 initmsgbuf((void *)RISCV_PA_TO_KVA(msgbufaddr), MSGBUFSIZE);
813 #endif
814
815 #define DPRINTF(v) VPRINTF("%24s = 0x%16lx\n", #v, (unsigned long)v);
816
817 VPRINTF("------------------------------------------\n");
818 DPRINTF(kern_vtopdiff);
819 DPRINTF(memory_start);
820 DPRINTF(memory_end);
821 DPRINTF(memory_size);
822 DPRINTF(kernstart_phys);
823 DPRINTF(kernend_phys)
824 DPRINTF(msgbufaddr);
825 // DPRINTF(physical_end);
826 DPRINTF(VM_MIN_KERNEL_ADDRESS);
827 DPRINTF(kernstart_mega);
828 DPRINTF(kernstart);
829 DPRINTF(kernend);
830 DPRINTF(kernend_mega);
831 #if 0
832 #ifdef MODULAR
833 DPRINTF(module_start);
834 DPRINTF(module_end);
835 #endif
836 #endif
837 DPRINTF(VM_MAX_KERNEL_ADDRESS);
838 #ifdef _LP64
839 DPRINTF(pmap_direct_base);
840 #endif
841 VPRINTF("------------------------------------------\n");
842
843 #undef DPRINTF
844
845 uvm_md_init();
846
847 /*
848 * pass memory pages to uvm
849 */
850 physmem = 0;
851 fdt_memory_foreach(riscv_add_memory, NULL);
852
853 pmap_bootstrap(kernelvmstart, VM_MAX_KERNEL_ADDRESS);
854
855 kasan_init();
856
857 /* Finish setting up lwp0 on our end before we call main() */
858 riscv_init_lwp0_uarea();
859 }
860
861
862 #ifdef _LP64
863 static void
864 pte_bits(void (*pr)(const char *, ...), pt_entry_t pte)
865 {
866 (*pr)("%c%c%c%c%c%c%c%c",
867 (pte & PTE_D) ? 'D' : '.',
868 (pte & PTE_A) ? 'A' : '.',
869 (pte & PTE_G) ? 'G' : '.',
870 (pte & PTE_U) ? 'U' : '.',
871 (pte & PTE_X) ? 'X' : '.',
872 (pte & PTE_W) ? 'W' : '.',
873 (pte & PTE_R) ? 'R' : '.',
874 (pte & PTE_V) ? 'V' : '.');
875 }
876
877 static void
878 dump_ln_table(paddr_t pdp_pa, int topbit, int level, vaddr_t va,
879 void (*pr)(const char *, ...) __printflike(1, 2))
880 {
881 pd_entry_t *pdp = (void *)PMAP_DIRECT_MAP(pdp_pa);
882
883 (*pr)("l%u @ pa %#16" PRIxREGISTER "\n", level, pdp_pa);
884 for (size_t i = 0; i < PAGE_SIZE / sizeof(pd_entry_t); i++) {
885 pd_entry_t entry = pdp[i];
886
887 if (topbit) {
888 va = i << (PGSHIFT + level * SEGLENGTH);
889 if (va & __BIT(topbit)) {
890 va |= __BITS(63, topbit);
891 }
892 }
893 if (entry != 0) {
894 paddr_t pa = __SHIFTOUT(entry, PTE_PPN) << PGSHIFT;
895 // check level PPN bits.
896 if (PTE_ISLEAF_P(entry)) {
897 (*pr)("l%u %3zu va 0x%016lx pa 0x%012lx - ",
898 level, i, va, pa);
899 pte_bits(pr, entry);
900 (*pr)("\n");
901 } else {
902 (*pr)("l%u %3zu va 0x%016lx -> 0x%012lx - ",
903 level, i, va, pa);
904 pte_bits(pr, entry);
905 (*pr)("\n");
906 if (level == 0) {
907 (*pr)("wtf\n");
908 continue;
909 }
910 if (pte_pde_valid_p(entry))
911 dump_ln_table(pa, 0, level - 1, va, pr);
912 }
913 }
914 va += 1UL << (PGSHIFT + level * SEGLENGTH);
915 }
916 }
917
918 #endif
919
920 void
921 pt_dump(void (*pr)(const char *, ...) __printflike(1, 2))
922 {
923 const register_t satp = csr_satp_read();
924 size_t topbit = sizeof(long) * NBBY - 1;
925
926 #ifdef _LP64
927 const paddr_t satp_pa = __SHIFTOUT(satp, SATP_PPN) << PGSHIFT;
928 const uint8_t mode = __SHIFTOUT(satp, SATP_MODE);
929 u_int level = 1;
930
931 switch (mode) {
932 case SATP_MODE_SV39:
933 case SATP_MODE_SV48:
934 topbit = (39 - 1) + (mode - 8) * SEGLENGTH;
935 level = mode - 6;
936 break;
937 }
938 #endif
939 (*pr)("topbit = %zu\n", topbit);
940
941 (*pr)("satp = 0x%" PRIxREGISTER "\n", satp);
942 #ifdef _LP64
943 dump_ln_table(satp_pa, topbit, level, 0, pr);
944 #endif
945 }
946
947 void
948 consinit(void)
949 {
950 static bool initialized = false;
951 const struct fdt_console *cons = fdtbus_get_console();
952 const struct fdt_platform *plat = fdt_platform_find();
953
954 if (initialized || cons == NULL)
955 return;
956
957 u_int uart_freq = 0;
958 extern struct bus_space riscv_generic_bs_tag;
959 struct fdt_attach_args faa = {
960 .faa_bst = &riscv_generic_bs_tag,
961 };
962
963 faa.faa_phandle = fdtbus_get_stdout_phandle();
964 if (plat->fp_uart_freq != NULL)
965 uart_freq = plat->fp_uart_freq();
966
967 cons->consinit(&faa, uart_freq);
968
969 initialized = true;
970 }
971