Home | History | Annotate | Line # | Download | only in riscv
      1 /*	$NetBSD: riscv_machdep.c,v 1.46 2025/10/12 04:08:26 thorpej Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2014, 2019, 2022 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Matt Thomas of 3am Software Foundry, and by Nick Hudson.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include "opt_ddb.h"
     33 #include "opt_modular.h"
     34 #include "opt_multiprocessor.h"
     35 #include "opt_riscv_debug.h"
     36 
     37 #include <sys/cdefs.h>
     38 __RCSID("$NetBSD: riscv_machdep.c,v 1.46 2025/10/12 04:08:26 thorpej Exp $");
     39 
     40 #include <sys/param.h>
     41 
     42 #include <sys/asan.h>
     43 #include <sys/boot_flag.h>
     44 #include <sys/cpu.h>
     45 #include <sys/exec.h>
     46 #include <sys/kmem.h>
     47 #include <sys/ktrace.h>
     48 #include <sys/lwp.h>
     49 #include <sys/module.h>
     50 #include <sys/mount.h>
     51 #include <sys/msgbuf.h>
     52 #include <sys/optstr.h>
     53 #include <sys/proc.h>
     54 #include <sys/reboot.h>
     55 #include <sys/syscall.h>
     56 #include <sys/sysctl.h>
     57 #include <sys/systm.h>
     58 
     59 #include <dev/cons.h>
     60 #ifdef __HAVE_MM_MD_KERNACC
     61 #include <dev/mm.h>
     62 #endif
     63 
     64 #include <uvm/uvm_extern.h>
     65 
     66 #include <riscv/frame.h>
     67 #include <riscv/locore.h>
     68 #include <riscv/machdep.h>
     69 #include <riscv/pte.h>
     70 #include <riscv/sbi.h>
     71 #include <riscv/userret.h>
     72 
     73 #include <libfdt.h>
     74 #include <dev/fdt/fdtvar.h>
     75 #include <dev/fdt/fdt_boot.h>
     76 #include <dev/fdt/fdt_console.h>
     77 #include <dev/fdt/fdt_memory.h>
     78 #include <dev/fdt/fdt_private.h>
     79 #include <dev/fdt/fdt_platform.h>
     80 
     81 int cpu_printfataltraps = 1;
     82 char machine[] = MACHINE;
     83 char machine_arch[] = MACHINE_ARCH;
     84 
     85 #ifdef VERBOSE_INIT_RISCV
     86 #define	VPRINTF(...)	printf(__VA_ARGS__)
     87 #else
     88 #define	VPRINTF(...)	__nothing
     89 #endif
     90 
     91 /* 64 should be enough, even for a ZFS UUID */
     92 #define	MAX_BOOT_DEV_STR	64
     93 
     94 char bootdevstr[MAX_BOOT_DEV_STR] = "";
     95 char *boot_args = NULL;
     96 
     97 paddr_t physical_start;
     98 paddr_t physical_end;
     99 
    100 static void
    101 earlyconsputc(dev_t dev, int c)
    102 {
    103 	uartputc(c);
    104 }
    105 
    106 static int
    107 earlyconsgetc(dev_t dev)
    108 {
    109 	return uartgetc();
    110 }
    111 
    112 static struct consdev earlycons = {
    113 	.cn_putc = earlyconsputc,
    114 	.cn_getc = earlyconsgetc,
    115 	.cn_pollc = nullcnpollc,
    116 };
    117 
    118 struct vm_map *phys_map;
    119 
    120 struct trapframe cpu_ddb_regs;
    121 const pcu_ops_t * const pcu_ops_md_defs[PCU_UNIT_COUNT] = {
    122 #ifdef FPE
    123 	[PCU_FPU] = &pcu_fpu_ops,
    124 #endif
    125 };
    126 
    127 /*
    128  * Used by PHYSTOV and VTOPHYS -- Will be set be BSS is zeroed so
    129  * keep it in data
    130  */
    131 unsigned long kern_vtopdiff __attribute__((__section__(".data")));
    132 
    133 
    134 /*
    135  * machine dependent system variables.
    136  */
    137 SYSCTL_SETUP(sysctl_machdep_setup, "sysctl machdep subtree setup")
    138 {
    139 	sysctl_createv(clog, 0, NULL, NULL,
    140 	    CTLFLAG_PERMANENT,
    141 	    CTLTYPE_NODE, "machdep", NULL,
    142 	    NULL, 0, NULL, 0,
    143 	    CTL_MACHDEP, CTL_EOL);
    144 }
    145 
    146 #ifdef MODULAR
    147 /*
    148  * Push any modules loaded by the boot loader.
    149  */
    150 void
    151 module_init_md(void)
    152 {
    153 }
    154 #endif /* MODULAR */
    155 
    156 /*
    157  * Set registers on exec.
    158  * Clear all registers except sp, pc.
    159  * sp is set to the stack pointer passed in.  pc is set to the entry
    160  * point given by the exec_package passed in.
    161  */
    162 void
    163 setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
    164 {
    165 	struct trapframe * const tf = l->l_md.md_utf;
    166 	struct proc * const p = l->l_proc;
    167 
    168 	memset(tf, 0, sizeof(*tf));
    169 	tf->tf_sp = (intptr_t)stack_align(stack);
    170 	tf->tf_pc = (intptr_t)pack->ep_entry & ~1;
    171 #ifdef _LP64
    172 	tf->tf_sr = (p->p_flag & PK_32) ? SR_USER32 : SR_USER64;
    173 #else
    174 	tf->tf_sr = SR_USER;
    175 #endif
    176 
    177 	// Set up arguments for ___start(cleanup, ps_strings)
    178 	tf->tf_a0 = 0;			// cleanup
    179 	tf->tf_a1 = p->p_psstrp;	// ps_strings
    180 
    181 	/*
    182 	 * Must have interrupts disabled for exception return.
    183 	 * Must be switching to user mode.
    184 	 * Must enable interrupts after sret.
    185 	 */
    186 	KASSERT(__SHIFTOUT(tf->tf_sr, SR_SIE) == 0);
    187 	KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPP) == 0);
    188 	KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPIE) != 0);
    189 }
    190 
    191 void
    192 md_child_return(struct lwp *l)
    193 {
    194 	struct trapframe * const tf = lwp_trapframe(l);
    195 
    196 	tf->tf_a0 = 0;
    197 	tf->tf_a1 = 1;
    198 #ifdef FPE
    199 	/* Disable FP as we can't be using it (yet). */
    200 	tf->tf_sr &= ~SR_FS;
    201 #endif
    202 
    203 	/*
    204 	 * Must have interrupts disabled for exception return.
    205 	 * Must be switching to user mode.
    206 	 * Must enable interrupts after sret.
    207 	 */
    208 
    209 	KASSERT(__SHIFTOUT(tf->tf_sr, SR_SIE) == 0);
    210 	KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPP) == 0);
    211 	KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPIE) != 0);
    212 
    213 	userret(l);
    214 }
    215 
    216 /*
    217  * Process the tail end of a posix_spawn() for the child.
    218  */
    219 void
    220 cpu_spawn_return(struct lwp *l)
    221 {
    222 	userret(l);
    223 }
    224 
    225 /*
    226  * Start a new LWP
    227  */
    228 void
    229 startlwp(void *arg)
    230 {
    231 	ucontext_t * const uc = arg;
    232 	lwp_t * const l = curlwp;
    233 	int error __diagused;
    234 
    235 	error = cpu_setmcontext(l, &uc->uc_mcontext, uc->uc_flags);
    236 	KASSERT(error == 0);
    237 
    238 	kmem_free(uc, sizeof(*uc));
    239 	userret(l);
    240 }
    241 
    242 // We've worked hard to make sure struct reg and __gregset_t are the same.
    243 // Ditto for struct fpreg and fregset_t.
    244 
    245 #ifdef _LP64
    246 CTASSERT(sizeof(struct reg) == sizeof(__gregset_t));
    247 #endif
    248 CTASSERT(sizeof(struct fpreg) == sizeof(__fregset_t));
    249 
    250 void
    251 cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags)
    252 {
    253 	const struct trapframe * const tf = l->l_md.md_utf;
    254 
    255 	/* Save register context. */
    256 	*(struct reg *)mcp->__gregs = tf->tf_regs;
    257 
    258 	*flags |= _UC_CPU | _UC_TLSBASE;
    259 
    260 	/* Save floating point register context, if any. */
    261 	KASSERT(l == curlwp);
    262 	if (fpu_valid_p(l)) {
    263 		/*
    264 		 * If this process is the current FP owner, dump its
    265 		 * context to the PCB first.
    266 		 */
    267 		fpu_save(l);
    268 
    269 		struct pcb * const pcb = lwp_getpcb(l);
    270 		*(struct fpreg *)mcp->__fregs = pcb->pcb_fpregs;
    271 		*flags |= _UC_FPU;
    272 	}
    273 }
    274 
    275 int
    276 cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp)
    277 {
    278 	/*
    279 	 * Verify that at least the PC and SP are user addresses.
    280 	 */
    281 	if ((intptr_t) mcp->__gregs[_REG_PC] < 0
    282 	    || (intptr_t) mcp->__gregs[_REG_SP] < 0
    283 	    || (mcp->__gregs[_REG_PC] & 1))
    284 		return EINVAL;
    285 
    286 	return 0;
    287 }
    288 
    289 int
    290 cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags)
    291 {
    292 	struct trapframe * const tf = l->l_md.md_utf;
    293 	struct proc * const p = l->l_proc;
    294 	const __greg_t * const gr = mcp->__gregs;
    295 	int error;
    296 
    297 	/* Restore register context, if any. */
    298 	if (flags & _UC_CPU) {
    299 		error = cpu_mcontext_validate(l, mcp);
    300 		if (error)
    301 			return error;
    302 
    303 		/*
    304 		 * Avoid updating TLS register here.
    305 		 */
    306 		const __greg_t saved_tp = tf->tf_reg[_REG_TP];
    307 		tf->tf_regs = *(const struct reg *)gr;
    308 		tf->tf_reg[_REG_TP] = saved_tp;
    309 	}
    310 
    311 	/* Restore the private thread context */
    312 	if (flags & _UC_TLSBASE) {
    313 		lwp_setprivate(l, (void *)(intptr_t)mcp->__gregs[_X_TP]);
    314 	}
    315 
    316 	/* Restore floating point register context, if any. */
    317 	if (flags & _UC_FPU) {
    318 		KASSERT(l == curlwp);
    319 		/* Tell PCU we are replacing the FPU contents. */
    320 		fpu_replace(l);
    321 
    322 		/*
    323 		 * The PCB FP regs struct includes the FP CSR, so use the
    324 		 * proper size of fpreg when copying.
    325 		 */
    326 		struct pcb * const pcb = lwp_getpcb(l);
    327 		pcb->pcb_fpregs = *(const struct fpreg *)mcp->__fregs;
    328 	}
    329 
    330 	mutex_enter(p->p_lock);
    331 	if (flags & _UC_SETSTACK)
    332 		l->l_sigstk.ss_flags |= SS_ONSTACK;
    333 	if (flags & _UC_CLRSTACK)
    334 		l->l_sigstk.ss_flags &= ~SS_ONSTACK;
    335 	mutex_exit(p->p_lock);
    336 
    337 	return 0;
    338 }
    339 
    340 void
    341 cpu_need_resched(struct cpu_info *ci, struct lwp *l, int flags)
    342 {
    343 	KASSERT(kpreempt_disabled());
    344 
    345 	if ((flags & RESCHED_KPREEMPT) != 0) {
    346 #ifdef __HAVE_PREEMPTION
    347 		if ((flags & RESCHED_REMOTE) != 0) {
    348 			cpu_send_ipi(ci, IPI_KPREEMPT);
    349 		} else {
    350 			softint_trigger(SOFTINT_KPREEMPT);
    351 		}
    352 #endif
    353 		return;
    354 	}
    355 	if ((flags & RESCHED_REMOTE) != 0) {
    356 #ifdef MULTIPROCESSOR
    357 		cpu_send_ipi(ci, IPI_AST);
    358 #endif
    359 	} else {
    360 		l->l_md.md_astpending = 1;	/* force call to ast() */
    361 	}
    362 }
    363 
    364 void
    365 cpu_signotify(struct lwp *l)
    366 {
    367 	KASSERT(kpreempt_disabled());
    368 #ifdef __HAVE_FAST_SOFTINTS
    369 	KASSERT(lwp_locked(l, NULL));
    370 #endif
    371 
    372 	if (l->l_cpu != curcpu()) {
    373 #ifdef MULTIPROCESSOR
    374 		cpu_send_ipi(l->l_cpu, IPI_AST);
    375 #endif
    376 	} else {
    377 		l->l_md.md_astpending = 1; 	/* force call to ast() */
    378 	}
    379 }
    380 
    381 void
    382 cpu_need_proftick(struct lwp *l)
    383 {
    384 	KASSERT(kpreempt_disabled());
    385 	KASSERT(l->l_cpu == curcpu());
    386 
    387 	l->l_pflag |= LP_OWEUPC;
    388 	l->l_md.md_astpending = 1;		/* force call to ast() */
    389 }
    390 
    391 
    392 /* Sync the discs, unmount the filesystems, and adjust the todr */
    393 static void
    394 bootsync(void)
    395 {
    396 	static bool bootsyncdone = false;
    397 
    398 	if (bootsyncdone)
    399 		return;
    400 
    401 	bootsyncdone = true;
    402 
    403 	/* Make sure we can still manage to do things */
    404 	if ((csr_sstatus_read() & SR_SIE) == 0) {
    405 		/*
    406 		 * If we get here then boot has been called without RB_NOSYNC
    407 		 * and interrupts were disabled. This means the boot() call
    408 		 * did not come from a user process e.g. shutdown, but must
    409 		 * have come from somewhere in the kernel.
    410 		 */
    411 		ENABLE_INTERRUPTS();
    412 		printf("Warning interrupts disabled during boot()\n");
    413 	}
    414 
    415 	vfs_shutdown();
    416 }
    417 
    418 
    419 void
    420 cpu_reboot(int howto, char *bootstr)
    421 {
    422 
    423 	/*
    424 	 * If RB_NOSYNC was not specified sync the discs.
    425 	 * Note: Unless cold is set to 1 here, syslogd will die during the
    426 	 * unmount.  It looks like syslogd is getting woken up only to find
    427 	 * that it cannot page part of the binary in as the filesystem has
    428 	 * been unmounted.
    429 	 */
    430 	if ((howto & RB_NOSYNC) == 0)
    431 		bootsync();
    432 
    433 #if 0
    434 	/* Disable interrupts. */
    435 	const int s = splhigh();
    436 
    437 	/* Do a dump if requested. */
    438 	if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP)
    439 		dumpsys();
    440 
    441 	splx(s);
    442 #endif
    443 
    444 	pmf_system_shutdown(boothowto);
    445 
    446 	/* Say NO to interrupts for good */
    447 	splhigh();
    448 
    449 	/* Run any shutdown hooks */
    450 	doshutdownhooks();
    451 
    452 	/* Make sure IRQ's are disabled */
    453 	DISABLE_INTERRUPTS();
    454 
    455 	if (howto & RB_HALT) {
    456 		printf("\n");
    457 		printf("The operating system has halted.\n");
    458 		printf("Please press any key to reboot.\n\n");
    459 		cnpollc(true);	/* for proper keyboard command handling */
    460 		if (cngetc() == 0) {
    461 			/* no console attached, so just hlt */
    462 			printf("No keyboard - cannot reboot after all.\n");
    463 			goto spin;
    464 		}
    465 		cnpollc(false);
    466 	}
    467 
    468 	printf("rebooting...\n");
    469 
    470 	sbi_system_reset(SBI_RESET_TYPE_COLDREBOOT, SBI_RESET_REASON_NONE);
    471 spin:
    472 	for (;;) {
    473 		asm volatile("wfi" ::: "memory");
    474 	}
    475 	/* NOTREACHED */
    476 }
    477 
    478 void
    479 cpu_dumpconf(void)
    480 {
    481 	// TBD!!
    482 }
    483 
    484 
    485 int
    486 cpu_lwp_setprivate(lwp_t *l, void *addr)
    487 {
    488 	struct trapframe * const tf = lwp_trapframe(l);
    489 
    490 	tf->tf_reg[_REG_TP] = (register_t)addr;
    491 
    492 	return 0;
    493 }
    494 
    495 
    496 void
    497 cpu_startup(void)
    498 {
    499 	vaddr_t minaddr, maxaddr;
    500 	char pbuf[10];	/* "999999 MB" -- But Sv39 is max 512GB */
    501 
    502 	/*
    503 	 * Good {morning,afternoon,evening,night}.
    504 	 */
    505 	printf("%s%s", copyright, version);
    506 	format_bytes(pbuf, sizeof(pbuf), ctob(physmem));
    507 	printf("total memory = %s\n", pbuf);
    508 
    509 	minaddr = 0;
    510 	/*
    511 	 * Allocate a submap for physio.
    512 	 */
    513 	phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
    514 	    VM_PHYS_SIZE, 0, FALSE, NULL);
    515 
    516 	format_bytes(pbuf, sizeof(pbuf), ptoa(uvm_availmem(false)));
    517 	printf("avail memory = %s\n", pbuf);
    518 
    519 #ifdef MULTIPROCESSOR
    520 	kcpuset_create(&cpus_halted, true);
    521 	KASSERT(cpus_halted != NULL);
    522 
    523 	kcpuset_create(&cpus_hatched, true);
    524 	KASSERT(cpus_hatched != NULL);
    525 
    526 	kcpuset_create(&cpus_paused, true);
    527 	KASSERT(cpus_paused != NULL);
    528 
    529 	kcpuset_create(&cpus_resumed, true);
    530 	KASSERT(cpus_resumed != NULL);
    531 
    532 	kcpuset_create(&cpus_running, true);
    533 	KASSERT(cpus_running != NULL);
    534 
    535 	kcpuset_set(cpus_hatched, cpu_index(curcpu()));
    536 	kcpuset_set(cpus_running, cpu_index(curcpu()));
    537 #endif
    538 
    539 	fdtbus_intr_init();
    540 
    541 	fdt_setup_rndseed();
    542 	fdt_setup_efirng();
    543 }
    544 
    545 static void
    546 riscv_add_memory(const struct fdt_memory *m, void *arg)
    547 {
    548 	paddr_t first = atop(m->start);
    549 	paddr_t last = atop(m->end);
    550 	int freelist = VM_FREELIST_DEFAULT;
    551 
    552 	VPRINTF("adding %#16" PRIxPADDR " - %#16" PRIxPADDR"  to freelist %d\n",
    553 	    m->start, m->end, freelist);
    554 
    555 	uvm_page_physload(first, last, first, last, freelist);
    556 	physmem += last - first;
    557 }
    558 
    559 
    560 static void
    561 cpu_kernel_vm_init(paddr_t memory_start, paddr_t memory_end)
    562 {
    563 	extern char __kernel_text[];
    564 	extern char _end[];
    565 
    566 	vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
    567 	vaddr_t kernend = round_page((vaddr_t)_end);
    568 	paddr_t kernstart_phys = KERN_VTOPHYS(kernstart);
    569 	paddr_t kernend_phys = KERN_VTOPHYS(kernend);
    570 
    571 	VPRINTF("%s: kernel phys start %#" PRIxPADDR " end %#" PRIxPADDR "\n",
    572 	    __func__, kernstart_phys, kernend_phys);
    573 	fdt_memory_remove_range(kernstart_phys,
    574 	    kernend_phys - kernstart_phys);
    575 
    576 #if 0
    577 	/*
    578 	 * Don't give these pages to UVM.
    579 	 *
    580 	 * cpu_kernel_vm_init need to create proper tables then the following
    581 	 * will be true.
    582 	 *
    583 	 * Now we have APs started the pages used for stacks and L1PT can
    584 	 * be given to uvm
    585 	 */
    586 	extern char const __start__init_memory[];
    587 	extern char const __stop__init_memory[] __weak;
    588 	if (&__start__init_memory[0] != &__stop__init_memory[0]) {
    589 		const paddr_t spa = KERN_VTOPHYS((vaddr_t)__start__init_memory);
    590 		const paddr_t epa = KERN_VTOPHYS((vaddr_t)__stop__init_memory);
    591 
    592 		VPRINTF("%s: init   phys start %#" PRIxPADDR
    593 		    " end %#" PRIxPADDR "\n", __func__, spa, epa);
    594 		fdt_memory_remove_range(spa, epa - spa);
    595 	}
    596 #endif
    597 
    598 #ifdef _LP64
    599 	paddr_t pa = memory_start & ~XSEGOFSET;
    600 	pmap_direct_base = RISCV_DIRECTMAP_START;
    601 	extern pd_entry_t l2_pte[PAGE_SIZE / sizeof(pd_entry_t)];
    602 
    603 
    604 	const vsize_t vshift = XSEGSHIFT;
    605 	const vaddr_t pdetab_mask = PMAP_PDETABSIZE - 1;
    606 	const vsize_t inc = 1UL << vshift;
    607 
    608 	const vaddr_t sva = RISCV_DIRECTMAP_START + pa;
    609 	const vaddr_t eva = RISCV_DIRECTMAP_END;
    610 	const size_t sidx = (sva >> vshift) & pdetab_mask;
    611 	const size_t eidx = (eva >> vshift) & pdetab_mask;
    612 
    613 	/* Allocate gigapages covering all physical memory in the direct map. */
    614 	pt_entry_t pbmt_flag = pte_enter_flags_to_pbmt(0);
    615 	for (size_t i = sidx; i < eidx && pa < memory_end; i++, pa += inc) {
    616 		l2_pte[i] = PA_TO_PTE(pa) | PTE_KERN | PTE_HARDWIRED | PTE_RW |
    617 		    pbmt_flag;
    618 		VPRINTF("dm:   %p :  %#" PRIxPADDR "\n", &l2_pte[i], l2_pte[i]);
    619 	}
    620 #endif
    621 //	pt_dump(printf);
    622 }
    623 
    624 static void
    625 riscv_init_lwp0_uarea(void)
    626 {
    627 	extern char lwp0uspace[];
    628 
    629 	uvm_lwp_setuarea(&lwp0, (vaddr_t)lwp0uspace);
    630 	memset(&lwp0.l_md, 0, sizeof(lwp0.l_md));
    631 	memset(lwp_getpcb(&lwp0), 0, sizeof(struct pcb));
    632 
    633 	struct trapframe *tf = (struct trapframe *)(lwp0uspace + USPACE) - 1;
    634 	memset(tf, 0, sizeof(*tf));
    635 
    636 	lwp0.l_md.md_utf = lwp0.l_md.md_ktf = tf;
    637 }
    638 
    639 
    640 static void
    641 riscv_print_memory(const struct fdt_memory *m, void *arg)
    642 {
    643 
    644 	VPRINTF("FDT /memory @ 0x%" PRIx64 " size 0x%" PRIx64 "\n",
    645 	    m->start, m->end - m->start);
    646 }
    647 
    648 
    649 static void
    650 parse_mi_bootargs(char *args)
    651 {
    652 	int howto;
    653 	bool found, start, skipping;
    654 
    655 	if (args == NULL)
    656 		return;
    657 
    658 	start = true;
    659 	skipping = false;
    660 	for (char *cp = args; *cp; cp++) {
    661 		/* check for "words" starting with a "-" only */
    662 		if (start) {
    663 			if (*cp == '-') {
    664 				skipping = false;
    665 			} else {
    666 				skipping = true;
    667 			}
    668 			start = false;
    669 			continue;
    670 		}
    671 
    672 		if (*cp == ' ') {
    673 			start = true;
    674 			skipping = false;
    675 			continue;
    676 		}
    677 
    678 		if (skipping) {
    679 			continue;
    680 		}
    681 
    682 		/* Check valid boot flags */
    683 		howto = 0;
    684 		BOOT_FLAG(*cp, howto);
    685 		if (!howto)
    686 			printf("bootflag '%c' not recognised\n", *cp);
    687 		else
    688 			boothowto |= howto;
    689 	}
    690 
    691 	found = optstr_get(args, "root", bootdevstr, sizeof(bootdevstr));
    692 	if (found) {
    693 		bootspec = bootdevstr;
    694 	}
    695 }
    696 
    697 
    698 void
    699 init_riscv(register_t hartid, paddr_t dtb)
    700 {
    701 
    702 	/* set temporally to work printf()/panic() even before consinit() */
    703 	cn_tab = &earlycons;
    704 
    705 	/* Critical to do this before mucking around with any more mappings. */
    706 	pmap_probe_pbmt();
    707 
    708 	/* Load FDT */
    709 	const vaddr_t dtbva = VM_KERNEL_DTB_BASE + (dtb & (NBSEG - 1));
    710 	void *fdt_data = (void *)dtbva;
    711 	int error = fdt_check_header(fdt_data);
    712 	if (error != 0)
    713 	    panic("fdt_check_header failed: %s", fdt_strerror(error));
    714 
    715 	fdtbus_init(fdt_data);
    716 
    717 	/* Lookup platform specific backend */
    718 	const struct fdt_platform * const plat = fdt_platform_find();
    719 	if (plat == NULL)
    720 		panic("Kernel does not support this device");
    721 
    722 	/* Early console may be available, announce ourselves. */
    723 	VPRINTF("FDT<%p>\n", fdt_data);
    724 
    725 	boot_args = fdt_get_bootargs();
    726 
    727 	VPRINTF("devmap %p\n", plat->fp_devmap());
    728 	pmap_devmap_bootstrap(0, plat->fp_devmap());
    729 
    730 	VPRINTF("bootstrap\n");
    731 	plat->fp_bootstrap();
    732 
    733 	/*
    734 	 * If stdout-path is specified on the command line, override the
    735 	 * value in /chosen/stdout-path before initializing console.
    736 	 */
    737 	VPRINTF("stdout\n");
    738 	fdt_update_stdout_path(fdt_data, boot_args);
    739 
    740 	/*
    741 	 * Done making changes to the FDT.
    742 	 */
    743 	fdt_pack(fdt_data);
    744 
    745 	const uint32_t dtbsize = round_page(fdt_totalsize(fdt_data));
    746 
    747 	VPRINTF("fdt size %x/%x\n", dtbsize, fdt_totalsize(fdt_data));
    748 
    749 	VPRINTF("consinit ");
    750 	consinit();
    751 	VPRINTF("ok\n");
    752 
    753 	/* Talk to the user */
    754 	printf("NetBSD/riscv (fdt) booting ...\n");
    755 
    756 #ifdef BOOT_ARGS
    757 	char mi_bootargs[] = BOOT_ARGS;
    758 	parse_mi_bootargs(mi_bootargs);
    759 #endif
    760 
    761 	uint64_t memory_start, memory_end;
    762 	fdt_memory_get(&memory_start, &memory_end);
    763 	physical_start = memory_start;
    764 	physical_end = memory_end;
    765 
    766 	fdt_memory_foreach(riscv_print_memory, NULL);
    767 
    768 	/* Cannot map memory above largest page number */
    769 	const uint64_t maxppn = __SHIFTOUT_MASK(PTE_PPN) - 1;
    770 	const uint64_t memory_limit = ptoa(maxppn);
    771 
    772 	if (memory_end > memory_limit) {
    773 		fdt_memory_remove_range(memory_limit, memory_end);
    774 		memory_end = memory_limit;
    775 	}
    776 
    777 	uint64_t memory_size __unused = memory_end - memory_start;
    778 
    779 	VPRINTF("%s: memory start %" PRIx64 " end %" PRIx64 " (len %"
    780 	    PRIx64 ")\n", __func__, memory_start, memory_end, memory_size);
    781 
    782 	/* Parse ramdisk, rndseed, and firmware's RNG from EFI */
    783 	fdt_probe_initrd();
    784 	fdt_probe_rndseed();
    785 	fdt_probe_efirng();
    786 
    787 	fdt_memory_remove_reserved(memory_start, memory_end);
    788 
    789 	fdt_memory_remove_range(dtb, dtbsize);
    790 	fdt_reserve_initrd();
    791 	fdt_reserve_rndseed();
    792 	fdt_reserve_efirng();
    793 
    794 	/* Perform PT build and VM init */
    795 	cpu_kernel_vm_init(memory_start, memory_end);
    796 
    797 	VPRINTF("bootargs: %s\n", boot_args);
    798 
    799 	parse_mi_bootargs(boot_args);
    800 
    801 #ifdef DDB
    802 	if (boothowto & RB_KDB) {
    803 		printf("Entering DDB...\n");
    804 		cpu_Debugger();
    805 	}
    806 #endif
    807 
    808 	extern char __kernel_text[];
    809 	extern char _end[];
    810 //	extern char __data_start[];
    811 //	extern char __rodata_start[];
    812 
    813 	vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
    814 	vaddr_t kernend = round_page((vaddr_t)_end);
    815 	paddr_t kernstart_phys __unused = KERN_VTOPHYS(kernstart);
    816 	paddr_t kernend_phys __unused = KERN_VTOPHYS(kernend);
    817 
    818 	vaddr_t kernelvmstart;
    819 
    820 	vaddr_t kernstart_mega __unused = MEGAPAGE_TRUNC(kernstart);
    821 	vaddr_t kernend_mega = MEGAPAGE_ROUND(kernend);
    822 
    823 	kernelvmstart = kernend_mega;
    824 
    825 #if 0
    826 #ifdef MODULAR
    827 #define MODULE_RESERVED_MAX	(1024 * 1024 * 128)
    828 #define MODULE_RESERVED_SIZE	(1024 * 1024 * 32)	/* good enough? */
    829 	module_start = kernelvmstart;
    830 	module_end = kernend_mega + MODULE_RESERVED_SIZE;
    831 	if (module_end >= kernstart_mega + MODULE_RESERVED_MAX)
    832 		module_end = kernstart_mega + MODULE_RESERVED_MAX;
    833 	KASSERT(module_end > kernend_mega);
    834 	kernelvmstart = module_end;
    835 #endif /* MODULAR */
    836 #endif
    837 	KASSERT(kernelvmstart < VM_KERNEL_VM_BASE);
    838 
    839 	kernelvmstart = VM_KERNEL_VM_BASE;
    840 
    841 	/*
    842 	 * msgbuf is allocated from the top of the last biggest memory block.
    843 	 */
    844 	paddr_t msgbufaddr = 0;
    845 
    846 #ifdef _LP64
    847 	/* XXX check all ranges for last one with a big enough hole */
    848 	msgbufaddr = memory_end - MSGBUFSIZE;
    849 	KASSERT(msgbufaddr != 0);	/* no space for msgbuf */
    850 	fdt_memory_remove_range(msgbufaddr, msgbufaddr + MSGBUFSIZE);
    851 	msgbufaddr = RISCV_PA_TO_KVA(msgbufaddr);
    852 	VPRINTF("msgbufaddr = %#lx\n", msgbufaddr);
    853 	initmsgbuf((void *)msgbufaddr, MSGBUFSIZE);
    854 #endif
    855 
    856 	KASSERT(msgbufaddr != 0);	/* no space for msgbuf */
    857 #ifdef _LP64
    858 	initmsgbuf((void *)RISCV_PA_TO_KVA(msgbufaddr), MSGBUFSIZE);
    859 #endif
    860 
    861 #define	DPRINTF(v)	VPRINTF("%24s = 0x%16lx\n", #v, (unsigned long)v);
    862 
    863 	VPRINTF("------------------------------------------\n");
    864 	DPRINTF(kern_vtopdiff);
    865 	DPRINTF(memory_start);
    866 	DPRINTF(memory_end);
    867 	DPRINTF(memory_size);
    868 	DPRINTF(kernstart_phys);
    869 	DPRINTF(kernend_phys)
    870 	DPRINTF(msgbufaddr);
    871 //	DPRINTF(physical_end);
    872 	DPRINTF(VM_MIN_KERNEL_ADDRESS);
    873 	DPRINTF(kernstart_mega);
    874 	DPRINTF(kernstart);
    875 	DPRINTF(kernend);
    876 	DPRINTF(kernend_mega);
    877 #if 0
    878 #ifdef MODULAR
    879 	DPRINTF(module_start);
    880 	DPRINTF(module_end);
    881 #endif
    882 #endif
    883 	DPRINTF(VM_MAX_KERNEL_ADDRESS);
    884 #ifdef _LP64
    885 	DPRINTF(pmap_direct_base);
    886 #endif
    887 	VPRINTF("------------------------------------------\n");
    888 
    889 #undef DPRINTF
    890 
    891 	uvm_md_init();
    892 
    893 	/*
    894 	 * pass memory pages to uvm
    895 	 */
    896 	physmem = 0;
    897 	fdt_memory_foreach(riscv_add_memory, NULL);
    898 
    899 	pmap_bootstrap(kernelvmstart, VM_MAX_KERNEL_ADDRESS);
    900 
    901 	kasan_init();
    902 
    903 	/* Finish setting up lwp0 on our end before we call main() */
    904 	riscv_init_lwp0_uarea();
    905 
    906 
    907 	error = 0;
    908 	if ((boothowto & RB_MD1) == 0) {
    909 		VPRINTF("mpstart\n");
    910 		if (plat->fp_mpstart)
    911 			error = plat->fp_mpstart();
    912 	}
    913 	if (error)
    914 		printf("AP startup problems\n");
    915 }
    916 
    917 
    918 #ifdef __HAVE_MM_MD_KERNACC
    919 
    920 #define IN_RANGE_P(addr, start, end)	(start) <= (addr) && (addr) < (end)
    921 #ifdef _LP64
    922 #define IN_DIRECTMAP_P(va) \
    923 	IN_RANGE_P(va, RISCV_DIRECTMAP_START, RISCV_DIRECTMAP_END)
    924 #else
    925 #define IN_DIRECTMAP_P(va) false
    926 #endif
    927 
    928 int
    929 mm_md_kernacc(void *ptr, vm_prot_t prot, bool *handled)
    930 {
    931 	extern char __kernel_text[];
    932 	extern char _end[];
    933 	extern char __data_start[];
    934 
    935 	const vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
    936 	const vaddr_t kernend = round_page((vaddr_t)_end);
    937 	const vaddr_t data_start = (vaddr_t)__data_start;
    938 
    939 	const vaddr_t va = (vaddr_t)ptr;
    940 
    941 	*handled = false;
    942 	if (IN_RANGE_P(va, kernstart, kernend)) {
    943 		*handled = true;
    944 		if (va < data_start && (prot & VM_PROT_WRITE) != 0) {
    945 			return EFAULT;
    946 		}
    947 	} else if (IN_DIRECTMAP_P(va)) {
    948 		*handled = true;
    949 	}
    950 
    951 	return 0;
    952 }
    953 #endif
    954 
    955 
    956 #ifdef _LP64
    957 static void
    958 pte_bits(void (*pr)(const char *, ...), pt_entry_t pte)
    959 {
    960 	(*pr)("%c%c%c%c%c%c%c%c",
    961 	    (pte & PTE_D) ? 'D' : '.',
    962 	    (pte & PTE_A) ? 'A' : '.',
    963 	    (pte & PTE_G) ? 'G' : '.',
    964 	    (pte & PTE_U) ? 'U' : '.',
    965 	    (pte & PTE_X) ? 'X' : '.',
    966 	    (pte & PTE_W) ? 'W' : '.',
    967 	    (pte & PTE_R) ? 'R' : '.',
    968 	    (pte & PTE_V) ? 'V' : '.');
    969 }
    970 
    971 static void
    972 dump_ln_table(paddr_t pdp_pa, int topbit, int level, vaddr_t va,
    973     void (*pr)(const char *, ...) __printflike(1, 2))
    974 {
    975 	pd_entry_t *pdp = (void *)PMAP_DIRECT_MAP(pdp_pa);
    976 
    977 	(*pr)("l%u     @  pa %#16" PRIxREGISTER "\n", level, pdp_pa);
    978 	for (size_t i = 0; i < PAGE_SIZE / sizeof(pd_entry_t); i++) {
    979 		pd_entry_t entry = pdp[i];
    980 
    981 		if (topbit) {
    982 			va = i << (PGSHIFT + level * SEGLENGTH);
    983 			if (va & __BIT(topbit)) {
    984 				va |= __BITS(63, topbit);
    985 			}
    986 		}
    987 		if (entry != 0) {
    988 			paddr_t pa = __SHIFTOUT(entry, PTE_PPN) << PGSHIFT;
    989 			// check level PPN bits.
    990 			if (PTE_ISLEAF_P(entry)) {
    991 				(*pr)("l%u %3zu    va 0x%016lx  pa 0x%012lx - ",
    992 				      level, i, va, pa);
    993 				pte_bits(pr, entry);
    994 				(*pr)("\n");
    995 			} else {
    996 				(*pr)("l%u %3zu    va 0x%016lx  -> 0x%012lx - ",
    997 				      level, i, va, pa);
    998 				pte_bits(pr, entry);
    999 				(*pr)("\n");
   1000 				if (level == 0) {
   1001 					(*pr)("wtf\n");
   1002 					continue;
   1003 				}
   1004 				if (pte_pde_valid_p(entry))
   1005 					dump_ln_table(pa, 0, level - 1, va, pr);
   1006 			}
   1007 		}
   1008 		va += 1UL << (PGSHIFT + level * SEGLENGTH);
   1009 	}
   1010 }
   1011 
   1012 void
   1013 pt_dump(void (*pr)(const char *, ...) __printflike(1, 2))
   1014 {
   1015 	const register_t satp = csr_satp_read();
   1016 	size_t topbit = sizeof(long) * NBBY - 1;
   1017 
   1018 #ifdef _LP64
   1019 	const paddr_t satp_pa = __SHIFTOUT(satp, SATP_PPN) << PGSHIFT;
   1020 	const uint8_t mode = __SHIFTOUT(satp, SATP_MODE);
   1021 	u_int level = 1;
   1022 
   1023 	switch (mode) {
   1024 	case SATP_MODE_SV39:
   1025 	case SATP_MODE_SV48:
   1026 		topbit = (39 - 1) + (mode - 8) * SEGLENGTH;
   1027 		level = mode - 6;
   1028 		break;
   1029 	}
   1030 #endif
   1031 	(*pr)("topbit = %zu\n", topbit);
   1032 
   1033 	(*pr)("satp   = 0x%" PRIxREGISTER "\n", satp);
   1034 #ifdef _LP64
   1035 	dump_ln_table(satp_pa, topbit, level, 0, pr);
   1036 #endif
   1037 }
   1038 #endif
   1039 
   1040 void
   1041 consinit(void)
   1042 {
   1043 	static bool initialized = false;
   1044 	const struct fdt_console *cons = fdtbus_get_console();
   1045 	const struct fdt_platform *plat = fdt_platform_find();
   1046 
   1047 	if (initialized || cons == NULL)
   1048 		return;
   1049 
   1050 	u_int uart_freq = 0;
   1051 	extern struct bus_space riscv_generic_bs_tag;
   1052 	struct fdt_attach_args faa = {
   1053 		.faa_bst = &riscv_generic_bs_tag,
   1054 	};
   1055 
   1056 	faa.faa_phandle = fdtbus_get_stdout_phandle();
   1057 	if (plat->fp_uart_freq != NULL)
   1058 		uart_freq = plat->fp_uart_freq();
   1059 
   1060 	cons->consinit(&faa, uart_freq);
   1061 
   1062 	initialized = true;
   1063 }
   1064