Home | History | Annotate | Line # | Download | only in riscv
riscv_machdep.c revision 1.26
      1 /*	$NetBSD: riscv_machdep.c,v 1.26 2023/05/07 12:41:49 skrll Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2014, 2019, 2022 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Matt Thomas of 3am Software Foundry, and by Nick Hudson.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include "opt_ddb.h"
     33 #include "opt_modular.h"
     34 #include "opt_riscv_debug.h"
     35 
     36 #include <sys/cdefs.h>
     37 __RCSID("$NetBSD: riscv_machdep.c,v 1.26 2023/05/07 12:41:49 skrll Exp $");
     38 
     39 #include <sys/param.h>
     40 
     41 #include <sys/asan.h>
     42 #include <sys/boot_flag.h>
     43 #include <sys/cpu.h>
     44 #include <sys/exec.h>
     45 #include <sys/kmem.h>
     46 #include <sys/ktrace.h>
     47 #include <sys/lwp.h>
     48 #include <sys/module.h>
     49 #include <sys/mount.h>
     50 #include <sys/msgbuf.h>
     51 #include <sys/optstr.h>
     52 #include <sys/proc.h>
     53 #include <sys/reboot.h>
     54 #include <sys/syscall.h>
     55 #include <sys/sysctl.h>
     56 #include <sys/systm.h>
     57 
     58 #include <dev/cons.h>
     59 #include <uvm/uvm_extern.h>
     60 
     61 #include <riscv/frame.h>
     62 #include <riscv/locore.h>
     63 #include <riscv/machdep.h>
     64 #include <riscv/pte.h>
     65 #include <riscv/sbi.h>
     66 
     67 #include <libfdt.h>
     68 #include <dev/fdt/fdtvar.h>
     69 #include <dev/fdt/fdt_boot.h>
     70 #include <dev/fdt/fdt_memory.h>
     71 #include <dev/fdt/fdt_private.h>
     72 
     73 int cpu_printfataltraps = 1;
     74 char machine[] = MACHINE;
     75 char machine_arch[] = MACHINE_ARCH;
     76 
     77 #ifdef VERBOSE_INIT_RISCV
     78 #define	VPRINTF(...)	printf(__VA_ARGS__)
     79 #else
     80 #define	VPRINTF(...)	__nothing
     81 #endif
     82 
     83 #ifndef FDT_MAX_BOOT_STRING
     84 #define	FDT_MAX_BOOT_STRING 1024
     85 #endif
     86 /* 64 should be enough, even for a ZFS UUID */
     87 #define	MAX_BOOT_DEV_STR	64
     88 
     89 char bootargs[FDT_MAX_BOOT_STRING] = "";
     90 char bootdevstr[MAX_BOOT_DEV_STR] = "";
     91 char *boot_args = NULL;
     92 
     93 paddr_t physical_start;
     94 paddr_t physical_end;
     95 
     96 static void
     97 earlyconsputc(dev_t dev, int c)
     98 {
     99 	uartputc(c);
    100 }
    101 
    102 static int
    103 earlyconsgetc(dev_t dev)
    104 {
    105 	return uartgetc();
    106 }
    107 
    108 static struct consdev earlycons = {
    109 	.cn_putc = earlyconsputc,
    110 	.cn_getc = earlyconsgetc,
    111 	.cn_pollc = nullcnpollc,
    112 };
    113 
    114 struct vm_map *phys_map;
    115 
    116 struct trapframe cpu_ddb_regs;
    117 const pcu_ops_t * const pcu_ops_md_defs[PCU_UNIT_COUNT] = {
    118 #ifdef FPE
    119 	[PCU_FPU] = &pcu_fpu_ops,
    120 #endif
    121 };
    122 
    123 /*
    124  * Used by PHYSTOV and VTOPHYS -- Will be set be BSS is zeroed so
    125  * keep it in data
    126  */
    127 unsigned long kern_vtopdiff __attribute__((__section__(".data")));
    128 
    129 
    130 /*
    131  * machine dependent system variables.
    132  */
    133 SYSCTL_SETUP(sysctl_machdep_setup, "sysctl machdep subtree setup")
    134 {
    135 	sysctl_createv(clog, 0, NULL, NULL,
    136 	    CTLFLAG_PERMANENT,
    137 	    CTLTYPE_NODE, "machdep", NULL,
    138 	    NULL, 0, NULL, 0,
    139 	    CTL_MACHDEP, CTL_EOL);
    140 }
    141 
    142 void
    143 delay(unsigned long us)
    144 {
    145 	const uint32_t cycles_per_us = curcpu()->ci_data.cpu_cc_freq / 1000000;
    146 	const uint64_t cycles = (uint64_t)us * cycles_per_us;
    147 	const uint64_t finish = csr_cycle_read() + cycles;
    148 
    149 	while (csr_cycle_read() < finish) {
    150 		/* spin, baby spin */
    151 	}
    152 }
    153 
    154 #ifdef MODULAR
    155 /*
    156  * Push any modules loaded by the boot loader.
    157  */
    158 void
    159 module_init_md(void)
    160 {
    161 }
    162 #endif /* MODULAR */
    163 
    164 /*
    165  * Set registers on exec.
    166  * Clear all registers except sp, pc.
    167  * sp is set to the stack pointer passed in.  pc is set to the entry
    168  * point given by the exec_package passed in.
    169  */
    170 void
    171 setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
    172 {
    173 	struct trapframe * const tf = l->l_md.md_utf;
    174 	struct proc * const p = l->l_proc;
    175 
    176 	memset(tf, 0, sizeof(*tf));
    177 	tf->tf_sp = (intptr_t)stack_align(stack);
    178 	tf->tf_pc = (intptr_t)pack->ep_entry & ~1;
    179 #ifdef _LP64
    180 	tf->tf_sr = (p->p_flag & PK_32) ? SR_USER32 : SR_USER64;
    181 #else
    182 	tf->tf_sr = SR_USER;
    183 #endif
    184 
    185 	// Set up arguments for ___start(cleanup, ps_strings)
    186 	tf->tf_a0 = 0;			// cleanup
    187 	tf->tf_a1 = p->p_psstrp;	// ps_strings
    188 
    189 	/*
    190 	 * Must have interrupts disabled for exception return.
    191 	 * Must be switching to user mode.
    192 	 * Must enable interrupts after sret.
    193 	 */
    194 	KASSERT(__SHIFTOUT(tf->tf_sr, SR_SIE) == 0);
    195 	KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPP) == 0);
    196 	KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPIE) != 0);
    197 }
    198 
    199 void
    200 md_child_return(struct lwp *l)
    201 {
    202 	struct trapframe * const tf = lwp_trapframe(l);
    203 
    204 	tf->tf_a0 = 0;
    205 	tf->tf_a1 = 1;
    206 #ifdef FPE
    207 	/* Disable FP as we can't be using it (yet). */
    208 	tf->tf_sr &= ~SR_FS;
    209 #endif
    210 
    211 	/*
    212 	 * Must have interrupts disabled for exception return.
    213 	 * Must be switching to user mode.
    214 	 * Must enable interrupts after sret.
    215 	 */
    216 
    217 	KASSERT(__SHIFTOUT(tf->tf_sr, SR_SIE) == 0);
    218 	KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPP) == 0);
    219 	KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPIE) != 0);
    220 
    221 	userret(l);
    222 }
    223 
    224 void
    225 cpu_spawn_return(struct lwp *l)
    226 {
    227 	userret(l);
    228 }
    229 
    230 /*
    231  * Start a new LWP
    232  */
    233 void
    234 startlwp(void *arg)
    235 {
    236 	ucontext_t * const uc = arg;
    237 	lwp_t * const l = curlwp;
    238 	int error __diagused;
    239 
    240 	error = cpu_setmcontext(l, &uc->uc_mcontext, uc->uc_flags);
    241 	KASSERT(error == 0);
    242 
    243 	kmem_free(uc, sizeof(*uc));
    244 	userret(l);
    245 }
    246 
    247 // We've worked hard to make sure struct reg and __gregset_t are the same.
    248 // Ditto for struct fpreg and fregset_t.
    249 
    250 #ifdef _LP64
    251 CTASSERT(sizeof(struct reg) == sizeof(__gregset_t));
    252 #endif
    253 CTASSERT(sizeof(struct fpreg) == sizeof(__fregset_t));
    254 
    255 void
    256 cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags)
    257 {
    258 	const struct trapframe * const tf = l->l_md.md_utf;
    259 
    260 	/* Save register context. */
    261 	*(struct reg *)mcp->__gregs = tf->tf_regs;
    262 
    263 	*flags |= _UC_CPU | _UC_TLSBASE;
    264 
    265 	/* Save floating point register context, if any. */
    266 	KASSERT(l == curlwp);
    267 	if (fpu_valid_p(l)) {
    268 		/*
    269 		 * If this process is the current FP owner, dump its
    270 		 * context to the PCB first.
    271 		 */
    272 		fpu_save(l);
    273 
    274 		struct pcb * const pcb = lwp_getpcb(l);
    275 		*(struct fpreg *)mcp->__fregs = pcb->pcb_fpregs;
    276 		*flags |= _UC_FPU;
    277 	}
    278 }
    279 
    280 int
    281 cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp)
    282 {
    283 	/*
    284 	 * Verify that at least the PC and SP are user addresses.
    285 	 */
    286 	if ((intptr_t) mcp->__gregs[_REG_PC] < 0
    287 	    || (intptr_t) mcp->__gregs[_REG_SP] < 0
    288 	    || (mcp->__gregs[_REG_PC] & 1))
    289 		return EINVAL;
    290 
    291 	return 0;
    292 }
    293 
    294 int
    295 cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags)
    296 {
    297 	struct trapframe * const tf = l->l_md.md_utf;
    298 	struct proc * const p = l->l_proc;
    299 	const __greg_t * const gr = mcp->__gregs;
    300 	int error;
    301 
    302 	/* Restore register context, if any. */
    303 	if (flags & _UC_CPU) {
    304 		error = cpu_mcontext_validate(l, mcp);
    305 		if (error)
    306 			return error;
    307 
    308 		/* Save register context. */
    309 		tf->tf_regs = *(const struct reg *)gr;
    310 	}
    311 
    312 	/* Restore the private thread context */
    313 	if (flags & _UC_TLSBASE) {
    314 		lwp_setprivate(l, (void *)(intptr_t)mcp->__gregs[_X_TP]);
    315 	}
    316 
    317 	/* Restore floating point register context, if any. */
    318 	if (flags & _UC_FPU) {
    319 		KASSERT(l == curlwp);
    320 		/* Tell PCU we are replacing the FPU contents. */
    321 		fpu_replace(l);
    322 
    323 		/*
    324 		 * The PCB FP regs struct includes the FP CSR, so use the
    325 		 * proper size of fpreg when copying.
    326 		 */
    327 		struct pcb * const pcb = lwp_getpcb(l);
    328 		pcb->pcb_fpregs = *(const struct fpreg *)mcp->__fregs;
    329 	}
    330 
    331 	mutex_enter(p->p_lock);
    332 	if (flags & _UC_SETSTACK)
    333 		l->l_sigstk.ss_flags |= SS_ONSTACK;
    334 	if (flags & _UC_CLRSTACK)
    335 		l->l_sigstk.ss_flags &= ~SS_ONSTACK;
    336 	mutex_exit(p->p_lock);
    337 
    338 	return 0;
    339 }
    340 
    341 void
    342 cpu_need_resched(struct cpu_info *ci, struct lwp *l, int flags)
    343 {
    344 	KASSERT(kpreempt_disabled());
    345 
    346 	if ((flags & RESCHED_KPREEMPT) != 0) {
    347 #ifdef __HAVE_PREEMPTION
    348 		if ((flags & RESCHED_REMOTE) != 0) {
    349 			cpu_send_ipi(ci, IPI_KPREEMPT);
    350 		} else {
    351 			softint_trigger(SOFTINT_KPREEMPT);
    352 		}
    353 #endif
    354 		return;
    355 	}
    356 	if ((flags & RESCHED_REMOTE) != 0) {
    357 #ifdef MULTIPROCESSOR
    358 		cpu_send_ipi(ci, IPI_AST);
    359 #endif
    360 	} else {
    361 		l->l_md.md_astpending = 1;	/* force call to ast() */
    362 	}
    363 }
    364 
    365 void
    366 cpu_signotify(struct lwp *l)
    367 {
    368 	KASSERT(kpreempt_disabled());
    369 #ifdef __HAVE_FAST_SOFTINTS
    370 	KASSERT(lwp_locked(l, NULL));
    371 #endif
    372 
    373 	if (l->l_cpu != curcpu()) {
    374 #ifdef MULTIPROCESSOR
    375 		cpu_send_ipi(ci, IPI_AST);
    376 #endif
    377 	} else {
    378 		l->l_md.md_astpending = 1; 	/* force call to ast() */
    379 	}
    380 }
    381 
    382 
    383 void
    384 cpu_need_proftick(struct lwp *l)
    385 {
    386 	KASSERT(kpreempt_disabled());
    387 	KASSERT(l->l_cpu == curcpu());
    388 
    389 	l->l_pflag |= LP_OWEUPC;
    390 	l->l_md.md_astpending = 1;		/* force call to ast() */
    391 }
    392 
    393 
    394 /* Sync the discs, unmount the filesystems, and adjust the todr */
    395 static void
    396 bootsync(void)
    397 {
    398 	static bool bootsyncdone = false;
    399 
    400 	if (bootsyncdone)
    401 		return;
    402 
    403 	bootsyncdone = true;
    404 
    405 	/* Make sure we can still manage to do things */
    406 	if ((csr_sstatus_read() & SR_SIE) == 0) {
    407 		/*
    408 		 * If we get here then boot has been called without RB_NOSYNC
    409 		 * and interrupts were disabled. This means the boot() call
    410 		 * did not come from a user process e.g. shutdown, but must
    411 		 * have come from somewhere in the kernel.
    412 		 */
    413 		ENABLE_INTERRUPTS();
    414 		printf("Warning interrupts disabled during boot()\n");
    415 	}
    416 
    417 	vfs_shutdown();
    418 
    419 	resettodr();
    420 }
    421 
    422 
    423 void
    424 cpu_reboot(int howto, char *bootstr)
    425 {
    426 
    427 	/*
    428 	 * If RB_NOSYNC was not specified sync the discs.
    429 	 * Note: Unless cold is set to 1 here, syslogd will die during the
    430 	 * unmount.  It looks like syslogd is getting woken up only to find
    431 	 * that it cannot page part of the binary in as the filesystem has
    432 	 * been unmounted.
    433 	 */
    434 	if ((howto & RB_NOSYNC) == 0)
    435 		bootsync();
    436 
    437 #if 0
    438 	/* Disable interrupts. */
    439 	const int s = splhigh();
    440 
    441 	/* Do a dump if requested. */
    442 	if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP)
    443 		dumpsys();
    444 
    445 	splx(s);
    446 #endif
    447 
    448 	pmf_system_shutdown(boothowto);
    449 
    450 	/* Say NO to interrupts for good */
    451 	splhigh();
    452 
    453 	/* Run any shutdown hooks */
    454 	doshutdownhooks();
    455 
    456 	/* Make sure IRQ's are disabled */
    457 	DISABLE_INTERRUPTS();
    458 
    459 	sbi_system_reset(SBI_RESET_TYPE_COLDREBOOT, SBI_RESET_REASON_NONE);
    460 
    461 	for (;;) {
    462 		asm volatile("wfi" ::: "memory");
    463 	}
    464 	/* NOTREACHED */
    465 }
    466 
    467 void
    468 cpu_dumpconf(void)
    469 {
    470 	// TBD!!
    471 }
    472 
    473 
    474 int
    475 cpu_lwp_setprivate(lwp_t *l, void *addr)
    476 {
    477 	struct trapframe * const tf = lwp_trapframe(l);
    478 
    479 	tf->tf_reg[_REG_TP] = (register_t)addr;
    480 
    481 	return 0;
    482 }
    483 
    484 
    485 void
    486 cpu_startup(void)
    487 {
    488 	vaddr_t minaddr, maxaddr;
    489 	char pbuf[10];	/* "999999 MB" -- But Sv39 is max 512GB */
    490 
    491 	/*
    492 	 * Good {morning,afternoon,evening,night}.
    493 	 */
    494 	printf("%s%s", copyright, version);
    495 	format_bytes(pbuf, sizeof(pbuf), ctob(physmem));
    496 	printf("total memory = %s\n", pbuf);
    497 
    498 	minaddr = 0;
    499 	/*
    500 	 * Allocate a submap for physio.
    501 	 */
    502 	phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
    503 	    VM_PHYS_SIZE, 0, FALSE, NULL);
    504 
    505 	format_bytes(pbuf, sizeof(pbuf), ptoa(uvm_availmem(false)));
    506 	printf("avail memory = %s\n", pbuf);
    507 
    508 	fdtbus_intr_init();
    509 }
    510 
    511 static void
    512 riscv_add_memory(const struct fdt_memory *m, void *arg)
    513 {
    514 	paddr_t first = atop(m->start);
    515 	paddr_t last = atop(m->end);
    516 	int freelist = VM_FREELIST_DEFAULT;
    517 
    518 	VPRINTF("adding %#16" PRIxPADDR " - %#16" PRIxPADDR"  to freelist %d\n",
    519 	    m->start, m->end, freelist);
    520 
    521 	uvm_page_physload(first, last, first, last, freelist);
    522 	physmem += last - first;
    523 }
    524 
    525 
    526 static void
    527 cpu_kernel_vm_init(paddr_t memory_start, paddr_t memory_end)
    528 {
    529 	extern char __kernel_text[];
    530 	extern char _end[];
    531 
    532 	vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
    533 	vaddr_t kernend = round_page((vaddr_t)_end);
    534 	paddr_t kernstart_phys = KERN_VTOPHYS(kernstart);
    535 	paddr_t kernend_phys = KERN_VTOPHYS(kernend);
    536 
    537 	VPRINTF("%s: kernel phys start %#" PRIxPADDR " end %#" PRIxPADDR "\n",
    538 	    __func__, kernstart_phys, kernend_phys);
    539 	fdt_memory_remove_range(kernstart_phys,
    540 	    kernend_phys - kernstart_phys);
    541 
    542 	/*
    543 	 * Don't give these pages to UVM.
    544 	 *
    545 	 * cpu_kernel_vm_init need to create proper tables then the following
    546 	 * will be true.
    547 	 *
    548 	 * Now we have APs started the pages used for stacks and L1PT can
    549 	 * be given to uvm
    550 	 */
    551 	extern char const __start__init_memory[];
    552 	extern char const __stop__init_memory[] __weak;
    553 	if (__start__init_memory != __stop__init_memory) {
    554 		const paddr_t spa = KERN_VTOPHYS((vaddr_t)__start__init_memory);
    555 		const paddr_t epa = KERN_VTOPHYS((vaddr_t)__stop__init_memory);
    556 
    557 		VPRINTF("%s: init   phys start %#" PRIxPADDR
    558 		    " end %#" PRIxPADDR "\n", __func__, spa, epa);
    559 		fdt_memory_remove_range(spa, epa - spa);
    560 	}
    561 
    562 #ifdef _LP64
    563 	paddr_t pa = memory_start & ~XSEGOFSET;
    564 	pmap_direct_base = RISCV_DIRECTMAP_START;
    565 	extern pd_entry_t l2_pte[PAGE_SIZE / sizeof(pd_entry_t)];
    566 
    567 
    568 	const vsize_t vshift = XSEGSHIFT;
    569 	const vaddr_t pdetab_mask = PMAP_PDETABSIZE - 1;
    570 	const vsize_t inc = 1UL << vshift;
    571 
    572 	const vaddr_t sva = RISCV_DIRECTMAP_START + pa;
    573 	const vaddr_t eva = RISCV_DIRECTMAP_END;
    574 	const size_t sidx = (sva >> vshift) & pdetab_mask;
    575 	const size_t eidx = (eva >> vshift) & pdetab_mask;
    576 
    577 	/* Allocate gigapages covering all physical memory in the direct map. */
    578 	for (size_t i = sidx; i < eidx && pa < memory_end; i++, pa += inc) {
    579 		l2_pte[i] = PA_TO_PTE(pa) | PTE_KERN | PTE_HARDWIRED | PTE_RW;
    580 		VPRINTF("dm:   %p :  %#" PRIxPADDR "\n", &l2_pte[i], l2_pte[i]);
    581 	}
    582 #endif
    583 //	pt_dump(printf);
    584 }
    585 
    586 static void
    587 riscv_init_lwp0_uarea(void)
    588 {
    589 	extern char lwp0uspace[];
    590 
    591 	uvm_lwp_setuarea(&lwp0, (vaddr_t)lwp0uspace);
    592 	memset(&lwp0.l_md, 0, sizeof(lwp0.l_md));
    593 	memset(lwp_getpcb(&lwp0), 0, sizeof(struct pcb));
    594 
    595 	struct trapframe *tf = (struct trapframe *)(lwp0uspace + USPACE) - 1;
    596 	memset(tf, 0, sizeof(*tf));
    597 
    598 	lwp0.l_md.md_utf = lwp0.l_md.md_ktf = tf;
    599 }
    600 
    601 
    602 static void
    603 riscv_print_memory(const struct fdt_memory *m, void *arg)
    604 {
    605 
    606 	VPRINTF("FDT /memory @ 0x%" PRIx64 " size 0x%" PRIx64 "\n",
    607 	    m->start, m->end - m->start);
    608 }
    609 
    610 
    611 static void
    612 parse_mi_bootargs(char *args)
    613 {
    614 	int howto;
    615 	bool found, start, skipping;
    616 
    617 	if (args == NULL)
    618 		return;
    619 
    620 	start = true;
    621 	skipping = false;
    622 	for (char *cp = args; *cp; cp++) {
    623 		/* check for "words" starting with a "-" only */
    624 		if (start) {
    625 			if (*cp == '-') {
    626 				skipping = false;
    627 			} else {
    628 				skipping = true;
    629 			}
    630 			start = false;
    631 			continue;
    632 		}
    633 
    634 		if (*cp == ' ') {
    635 			start = true;
    636 			skipping = false;
    637 			continue;
    638 		}
    639 
    640 		if (skipping) {
    641 			continue;
    642 		}
    643 
    644 		/* Check valid boot flags */
    645 		howto = 0;
    646 		BOOT_FLAG(*cp, howto);
    647 		if (!howto)
    648 			printf("bootflag '%c' not recognised\n", *cp);
    649 		else
    650 			boothowto |= howto;
    651 	}
    652 
    653 	found = optstr_get(args, "root", bootdevstr, sizeof(bootdevstr));
    654 	if (found) {
    655 		bootspec = bootdevstr;
    656 	}
    657 }
    658 
    659 
    660 void
    661 init_riscv(register_t hartid, paddr_t dtb)
    662 {
    663 
    664 	/* set temporally to work printf()/panic() even before consinit() */
    665 	cn_tab = &earlycons;
    666 
    667 	/* Load FDT */
    668 	const vaddr_t dtbva = VM_KERNEL_DTB_BASE + (dtb & (NBSEG - 1));
    669 	void *fdt_data = (void *)dtbva;
    670 	int error = fdt_check_header(fdt_data);
    671 	if (error != 0)
    672 	    panic("fdt_check_header failed: %s", fdt_strerror(error));
    673 
    674 	fdtbus_init(fdt_data);
    675 
    676 	/* Lookup platform specific backend */
    677 	const struct fdt_platform *plat = fdt_platform_find();
    678 	if (plat == NULL)
    679 		panic("Kernel does not support this device");
    680 
    681 	/* Early console may be available, announce ourselves. */
    682 	VPRINTF("FDT<%p>\n", fdt_data);
    683 
    684 	const int chosen = OF_finddevice("/chosen");
    685 	if (chosen >= 0)
    686 		OF_getprop(chosen, "bootargs", bootargs, sizeof(bootargs));
    687 	boot_args = bootargs;
    688 
    689 	VPRINTF("devmap %p\n", plat->fp_devmap());
    690 	pmap_devmap_bootstrap(0, plat->fp_devmap());
    691 
    692 	VPRINTF("bootstrap\n");
    693 	plat->fp_bootstrap();
    694 
    695 	/*
    696 	 * If stdout-path is specified on the command line, override the
    697 	 * value in /chosen/stdout-path before initializing console.
    698 	 */
    699 	VPRINTF("stdout\n");
    700 	fdt_update_stdout_path(fdt_data, boot_args);
    701 
    702 	/*
    703 	 * Done making changes to the FDT.
    704 	 */
    705 	fdt_pack(fdt_data);
    706 
    707 	const uint32_t dtbsize = round_page(fdt_totalsize(fdt_data));
    708 
    709 	VPRINTF("fdt size %x/%x\n", dtbsize, fdt_totalsize(fdt_data));
    710 
    711 	VPRINTF("consinit ");
    712 	consinit();
    713 	VPRINTF("ok\n");
    714 
    715 	/* Talk to the user */
    716 	printf("NetBSD/riscv (fdt) booting ...\n");
    717 
    718 #ifdef BOOT_ARGS
    719 	char mi_bootargs[] = BOOT_ARGS;
    720 	parse_mi_bootargs(mi_bootargs);
    721 #endif
    722 
    723 	uint64_t memory_start, memory_end;
    724 	fdt_memory_get(&memory_start, &memory_end);
    725 	physical_start = memory_start;
    726 	physical_end = memory_end;
    727 
    728 	fdt_memory_foreach(riscv_print_memory, NULL);
    729 
    730 	/* Cannot map memory above largest page number */
    731 	const uint64_t maxppn = __SHIFTOUT_MASK(PTE_PPN) - 1;
    732 	const uint64_t memory_limit = ptoa(maxppn);
    733 
    734 	if (memory_end > memory_limit) {
    735 		fdt_memory_remove_range(memory_limit, memory_end);
    736 		memory_end = memory_limit;
    737 	}
    738 
    739 	uint64_t memory_size __unused = memory_end - memory_start;
    740 
    741 	VPRINTF("%s: memory start %" PRIx64 " end %" PRIx64 " (len %"
    742 	    PRIx64 ")\n", __func__, memory_start, memory_end, memory_size);
    743 
    744 	fdt_memory_remove_reserved(memory_start, memory_end);
    745 
    746 	fdt_memory_remove_range(dtb, dtb + dtbsize);
    747 
    748 	/* Perform PT build and VM init */
    749 	cpu_kernel_vm_init(memory_start, memory_end);
    750 
    751 	VPRINTF("bootargs: %s\n", bootargs);
    752 
    753 	parse_mi_bootargs(boot_args);
    754 
    755 #ifdef DDB
    756 	if (boothowto & RB_KDB) {
    757 		printf("Entering DDB...\n");
    758 		cpu_Debugger();
    759 	}
    760 #endif
    761 
    762 	extern char __kernel_text[];
    763 	extern char _end[];
    764 //	extern char __data_start[];
    765 //	extern char __rodata_start[];
    766 
    767 	vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
    768 	vaddr_t kernend = round_page((vaddr_t)_end);
    769 	paddr_t kernstart_phys __unused = KERN_VTOPHYS(kernstart);
    770 	paddr_t kernend_phys __unused = KERN_VTOPHYS(kernend);
    771 
    772 	vaddr_t kernelvmstart;
    773 
    774 	vaddr_t kernstart_mega __unused = MEGAPAGE_TRUNC(kernstart);
    775 	vaddr_t kernend_mega = MEGAPAGE_ROUND(kernend);
    776 
    777 	kernelvmstart = kernend_mega;
    778 
    779 #if 0
    780 #ifdef MODULAR
    781 #define MODULE_RESERVED_MAX	(1024 * 1024 * 128)
    782 #define MODULE_RESERVED_SIZE	(1024 * 1024 * 32)	/* good enough? */
    783 	module_start = kernelvmstart;
    784 	module_end = kernend_mega + MODULE_RESERVED_SIZE;
    785 	if (module_end >= kernstart_mega + MODULE_RESERVED_MAX)
    786 		module_end = kernstart_mega + MODULE_RESERVED_MAX;
    787 	KASSERT(module_end > kernend_mega);
    788 	kernelvmstart = module_end;
    789 #endif /* MODULAR */
    790 #endif
    791 	KASSERT(kernelvmstart < VM_KERNEL_VM_BASE);
    792 
    793 	kernelvmstart = VM_KERNEL_VM_BASE;
    794 
    795 	/*
    796 	 * msgbuf is allocated from the top of the last biggest memory block.
    797 	 */
    798 	paddr_t msgbufaddr = 0;
    799 
    800 #ifdef _LP64
    801 	/* XXX check all ranges for last one with a big enough hole */
    802 	msgbufaddr = memory_end - MSGBUFSIZE;
    803 	KASSERT(msgbufaddr != 0);	/* no space for msgbuf */
    804 	fdt_memory_remove_range(msgbufaddr, msgbufaddr + MSGBUFSIZE);
    805 	msgbufaddr = RISCV_PA_TO_KVA(msgbufaddr);
    806 	VPRINTF("msgbufaddr = %#lx\n", msgbufaddr);
    807 	initmsgbuf((void *)msgbufaddr, MSGBUFSIZE);
    808 #endif
    809 
    810 	KASSERT(msgbufaddr != 0);	/* no space for msgbuf */
    811 #ifdef _LP64
    812 	initmsgbuf((void *)RISCV_PA_TO_KVA(msgbufaddr), MSGBUFSIZE);
    813 #endif
    814 
    815 #define	DPRINTF(v)	VPRINTF("%24s = 0x%16lx\n", #v, (unsigned long)v);
    816 
    817 	VPRINTF("------------------------------------------\n");
    818 	DPRINTF(kern_vtopdiff);
    819 	DPRINTF(memory_start);
    820 	DPRINTF(memory_end);
    821 	DPRINTF(memory_size);
    822 	DPRINTF(kernstart_phys);
    823 	DPRINTF(kernend_phys)
    824 	DPRINTF(msgbufaddr);
    825 //	DPRINTF(physical_end);
    826 	DPRINTF(VM_MIN_KERNEL_ADDRESS);
    827 	DPRINTF(kernstart_mega);
    828 	DPRINTF(kernstart);
    829 	DPRINTF(kernend);
    830 	DPRINTF(kernend_mega);
    831 #if 0
    832 #ifdef MODULAR
    833 	DPRINTF(module_start);
    834 	DPRINTF(module_end);
    835 #endif
    836 #endif
    837 	DPRINTF(VM_MAX_KERNEL_ADDRESS);
    838 #ifdef _LP64
    839 	DPRINTF(pmap_direct_base);
    840 #endif
    841 	VPRINTF("------------------------------------------\n");
    842 
    843 #undef DPRINTF
    844 
    845 	uvm_md_init();
    846 
    847 	/*
    848 	 * pass memory pages to uvm
    849 	 */
    850 	physmem = 0;
    851 	fdt_memory_foreach(riscv_add_memory, NULL);
    852 
    853 	pmap_bootstrap(kernelvmstart, VM_MAX_KERNEL_ADDRESS);
    854 
    855 	kasan_init();
    856 
    857 	/* Finish setting up lwp0 on our end before we call main() */
    858 	riscv_init_lwp0_uarea();
    859 }
    860 
    861 
    862 #ifdef _LP64
    863 static void
    864 pte_bits(void (*pr)(const char *, ...), pt_entry_t pte)
    865 {
    866 	(*pr)("%c%c%c%c%c%c%c%c",
    867 	    (pte & PTE_D) ? 'D' : '.',
    868 	    (pte & PTE_A) ? 'A' : '.',
    869 	    (pte & PTE_G) ? 'G' : '.',
    870 	    (pte & PTE_U) ? 'U' : '.',
    871 	    (pte & PTE_X) ? 'X' : '.',
    872 	    (pte & PTE_W) ? 'W' : '.',
    873 	    (pte & PTE_R) ? 'R' : '.',
    874 	    (pte & PTE_V) ? 'V' : '.');
    875 }
    876 
    877 static void
    878 dump_ln_table(paddr_t pdp_pa, int topbit, int level, vaddr_t va,
    879     void (*pr)(const char *, ...) __printflike(1, 2))
    880 {
    881 	pd_entry_t *pdp = (void *)PMAP_DIRECT_MAP(pdp_pa);
    882 
    883 	(*pr)("l%u     @  pa %#16" PRIxREGISTER "\n", level, pdp_pa);
    884 	for (size_t i = 0; i < PAGE_SIZE / sizeof(pd_entry_t); i++) {
    885 		pd_entry_t entry = pdp[i];
    886 
    887 		if (topbit) {
    888 			va = i << (PGSHIFT + level * SEGLENGTH);
    889 			if (va & __BIT(topbit)) {
    890 				va |= __BITS(63, topbit);
    891 			}
    892 		}
    893 		if (entry != 0) {
    894 			paddr_t pa = __SHIFTOUT(entry, PTE_PPN) << PGSHIFT;
    895 			// check level PPN bits.
    896 			if (PTE_ISLEAF_P(entry)) {
    897 				(*pr)("l%u %3zu    va 0x%016lx  pa 0x%012lx - ",
    898 				      level, i, va, pa);
    899 				pte_bits(pr, entry);
    900 				(*pr)("\n");
    901 			} else {
    902 				(*pr)("l%u %3zu    va 0x%016lx  -> 0x%012lx - ",
    903 				      level, i, va, pa);
    904 				pte_bits(pr, entry);
    905 				(*pr)("\n");
    906 				if (level == 0) {
    907 					(*pr)("wtf\n");
    908 					continue;
    909 				}
    910 				if (pte_pde_valid_p(entry))
    911 					dump_ln_table(pa, 0, level - 1, va, pr);
    912 			}
    913 		}
    914 		va += 1UL << (PGSHIFT + level * SEGLENGTH);
    915 	}
    916 }
    917 
    918 #endif
    919 
    920 void
    921 pt_dump(void (*pr)(const char *, ...) __printflike(1, 2))
    922 {
    923 	const register_t satp = csr_satp_read();
    924 	size_t topbit = sizeof(long) * NBBY - 1;
    925 
    926 #ifdef _LP64
    927 	const paddr_t satp_pa = __SHIFTOUT(satp, SATP_PPN) << PGSHIFT;
    928 	const uint8_t mode = __SHIFTOUT(satp, SATP_MODE);
    929 	u_int level = 1;
    930 
    931 	switch (mode) {
    932 	case SATP_MODE_SV39:
    933 	case SATP_MODE_SV48:
    934 		topbit = (39 - 1) + (mode - 8) * SEGLENGTH;
    935 		level = mode - 6;
    936 		break;
    937 	}
    938 #endif
    939 	(*pr)("topbit = %zu\n", topbit);
    940 
    941 	(*pr)("satp   = 0x%" PRIxREGISTER "\n", satp);
    942 #ifdef _LP64
    943 	dump_ln_table(satp_pa, topbit, level, 0, pr);
    944 #endif
    945 }
    946 
    947 void
    948 consinit(void)
    949 {
    950 	static bool initialized = false;
    951 	const struct fdt_console *cons = fdtbus_get_console();
    952 	const struct fdt_platform *plat = fdt_platform_find();
    953 
    954 	if (initialized || cons == NULL)
    955 		return;
    956 
    957 	u_int uart_freq = 0;
    958 	extern struct bus_space riscv_generic_bs_tag;
    959 	struct fdt_attach_args faa = {
    960 		.faa_bst = &riscv_generic_bs_tag,
    961 	};
    962 
    963 	faa.faa_phandle = fdtbus_get_stdout_phandle();
    964 	if (plat->fp_uart_freq != NULL)
    965 		uart_freq = plat->fp_uart_freq();
    966 
    967 	cons->consinit(&faa, uart_freq);
    968 
    969 	initialized = true;
    970 }
    971