1 /* $NetBSD: pmap_machdep.c,v 1.23 2025/10/12 19:44:04 skrll Exp $ */ 2 3 /* 4 * Copyright (c) 2014, 2019, 2021 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Matt Thomas (of 3am Software Foundry), Maxime Villard, and 9 * Nick Hudson. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include "opt_riscv_debug.h" 34 #include "opt_multiprocessor.h" 35 36 #define __PMAP_PRIVATE 37 38 #include <sys/cdefs.h> 39 __RCSID("$NetBSD: pmap_machdep.c,v 1.23 2025/10/12 19:44:04 skrll Exp $"); 40 41 #include <sys/param.h> 42 #include <sys/buf.h> 43 #include <sys/cpu.h> 44 45 #include <uvm/uvm.h> 46 47 #include <riscv/machdep.h> 48 #include <riscv/sbi.h> 49 #include <riscv/sysreg.h> 50 51 #ifdef VERBOSE_INIT_RISCV 52 #define VPRINTF(...) printf(__VA_ARGS__) 53 #else 54 #define VPRINTF(...) __nothing 55 #endif 56 57 vaddr_t pmap_direct_base __read_mostly; 58 vaddr_t pmap_direct_end __read_mostly; 59 60 #ifdef _LP64 61 static pt_entry_t pmap_pte_pbmt_mask __read_mostly; 62 static pt_entry_t pmap_pte_pma __read_mostly; 63 static pt_entry_t pmap_pte_nc __read_mostly; 64 static pt_entry_t pmap_pte_io __read_mostly; 65 #endif 66 67 void 68 pmap_zero_page(paddr_t pa) 69 { 70 #ifdef _LP64 71 #ifdef PMAP_DIRECT_MAP 72 memset((void *)PMAP_DIRECT_MAP(pa), 0, PAGE_SIZE); 73 #else 74 #error "no direct map" 75 #endif 76 #else 77 KASSERT(false); 78 #endif 79 } 80 81 void 82 pmap_copy_page(paddr_t src, paddr_t dst) 83 { 84 #ifdef _LP64 85 #ifdef PMAP_DIRECT_MAP 86 memcpy((void *)PMAP_DIRECT_MAP(dst), (const void *)PMAP_DIRECT_MAP(src), 87 PAGE_SIZE); 88 #else 89 #error "no direct map" 90 #endif 91 #else 92 KASSERT(false); 93 #endif 94 } 95 96 #ifdef _LP64 97 pt_entry_t 98 pte_enter_flags_to_pbmt(int flags) 99 { 100 if (flags & PMAP_DEV) { 101 return pmap_pte_io; 102 } else if (flags & PMAP_NOCACHE) { 103 return pmap_pte_nc; 104 } 105 return pmap_pte_pma; 106 } 107 #endif 108 109 struct vm_page * 110 pmap_md_alloc_poolpage(int flags) 111 { 112 113 return uvm_pagealloc(NULL, 0, NULL, flags); 114 } 115 116 vaddr_t 117 pmap_md_map_poolpage(paddr_t pa, vsize_t len) 118 { 119 #ifdef _LP64 120 return PMAP_DIRECT_MAP(pa); 121 #else 122 panic("not supported"); 123 #endif 124 } 125 126 void 127 pmap_md_unmap_poolpage(vaddr_t pa, vsize_t len) 128 { 129 /* nothing to do */ 130 } 131 132 133 bool 134 pmap_md_direct_mapped_vaddr_p(vaddr_t va) 135 { 136 #ifdef _LP64 137 return RISCV_DIRECTMAP_P(va); 138 #else 139 return false; 140 #endif 141 } 142 143 bool 144 pmap_md_io_vaddr_p(vaddr_t va) 145 { 146 return false; 147 } 148 149 paddr_t 150 pmap_md_direct_mapped_vaddr_to_paddr(vaddr_t va) 151 { 152 #ifdef _LP64 153 #ifdef PMAP_DIRECT_MAP 154 return PMAP_DIRECT_UNMAP(va); 155 #else 156 KASSERT(false); 157 return 0; 158 #endif 159 #else 160 KASSERT(false); 161 return 0; 162 #endif 163 } 164 165 vaddr_t 166 pmap_md_direct_map_paddr(paddr_t pa) 167 { 168 #ifdef _LP64 169 return PMAP_DIRECT_MAP(pa); 170 #else 171 panic("not supported"); 172 #endif 173 } 174 175 void 176 pmap_md_init(void) 177 { 178 pmap_tlb_info_evcnt_attach(&pmap_tlb0_info); 179 } 180 181 bool 182 pmap_md_ok_to_steal_p(const uvm_physseg_t bank, size_t npgs) 183 { 184 return true; 185 } 186 187 #ifdef MULTIPROCESSOR 188 void 189 pmap_md_tlb_info_attach(struct pmap_tlb_info *ti, struct cpu_info *ci) 190 { 191 } 192 #endif 193 194 195 void 196 pmap_md_xtab_activate(struct pmap *pmap, struct lwp *l) 197 { 198 // UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 199 200 // struct cpu_info * const ci = curcpu(); 201 struct pmap_tlb_info * const ti = cpu_tlb_info(ci); 202 struct pmap_asid_info * const pai = PMAP_PAI(pmap, ti); 203 204 uint64_t satp = 205 #ifdef _LP64 206 __SHIFTIN(SATP_MODE_SV39, SATP_MODE) | 207 #else 208 __SHIFTIN(SATP_MODE_SV32, SATP_MODE) | 209 #endif 210 __SHIFTIN(pai->pai_asid, SATP_ASID) | 211 __SHIFTIN(pmap->pm_md.md_ppn, SATP_PPN); 212 213 csr_satp_write(satp); 214 215 if (l && !tlbinfo_asids_p(ti)) { 216 tlb_invalidate_all(); 217 } 218 } 219 220 void 221 pmap_md_xtab_deactivate(struct pmap *pmap) 222 { 223 224 /* switch to kernel pmap */ 225 pmap_md_xtab_activate(pmap_kernel(), NULL); 226 } 227 228 void 229 pmap_md_pdetab_init(struct pmap *pmap) 230 { 231 KASSERT(pmap != NULL); 232 233 const vaddr_t pdetabva = (vaddr_t)pmap->pm_pdetab; 234 const paddr_t pdetabpa = pmap_md_direct_mapped_vaddr_to_paddr(pdetabva); 235 pmap->pm_md.md_ppn = pdetabpa >> PAGE_SHIFT; 236 237 /* XXXSB can we "pre-optimise" this by keeping a list of pdes to copy? */ 238 /* XXXSB for relatively normal size memory (8gb) we only need 10-20ish ptes? */ 239 /* XXXSB most (all?) of these ptes are in two consecutive ranges. */ 240 for (size_t i = NPDEPG / 2; i < NPDEPG; ++i) { 241 /* 242 * XXXSB where/when do new entries in pmap_kernel()->pm_pdetab 243 * XXXSB get added to existing pmaps? 244 * 245 * pmap_growkernel doesn't have support for fixing up exiting 246 * pmaps. (yet) 247 * 248 * Various options: 249 * 250 * - do the x86 thing. maintain a list of pmaps and update them 251 * all in pmap_growkernel. 252 * - make sure the top level entries are populated and them simply 253 * copy "them all" here. If pmap_growkernel runs the new entries 254 * will become visible to all pmaps. 255 * - ... 256 */ 257 258 /* XXXSB is this any faster than blindly copying all "high" entries? */ 259 pd_entry_t pde = pmap_kernel()->pm_pdetab->pde_pde[i]; 260 261 /* we might have leaf entries (direct map) as well as non-leaf */ 262 if (pde) { 263 pmap->pm_pdetab->pde_pde[i] = pde; 264 } 265 } 266 } 267 268 void 269 pmap_md_pdetab_fini(struct pmap *pmap) 270 { 271 272 if (pmap == pmap_kernel()) 273 return; 274 for (size_t i = NPDEPG / 2; i < NPDEPG; ++i) { 275 KASSERT(pte_invalid_pde() == 0); 276 pmap->pm_pdetab->pde_pde[i] = 0; 277 } 278 } 279 280 static void 281 pmap_md_grow(pmap_pdetab_t *ptb, vaddr_t va, vsize_t vshift, 282 vsize_t *remaining) 283 { 284 KASSERT((va & (NBSEG - 1)) == 0); 285 #ifdef _LP64 286 const vaddr_t pdetab_mask = PMAP_PDETABSIZE - 1; 287 const vsize_t vinc = 1UL << vshift; 288 289 for (size_t i = (va >> vshift) & pdetab_mask; 290 i < PMAP_PDETABSIZE; i++, va += vinc) { 291 pd_entry_t * const pde_p = 292 &ptb->pde_pde[(va >> vshift) & pdetab_mask]; 293 294 vaddr_t pdeva; 295 if (pte_pde_valid_p(*pde_p)) { 296 const paddr_t pa = pte_pde_to_paddr(*pde_p); 297 pdeva = pmap_md_direct_map_paddr(pa); 298 } else { 299 /* 300 * uvm_pageboot_alloc() returns a direct mapped address 301 */ 302 pdeva = uvm_pageboot_alloc(PAGE_SIZE); 303 paddr_t pdepa = RISCV_KVA_TO_PA(pdeva); 304 *pde_p = pte_pde_pdetab(pdepa, true); 305 memset((void *)pdeva, 0, PAGE_SIZE); 306 } 307 308 if (vshift > SEGSHIFT) { 309 pmap_md_grow((pmap_pdetab_t *)pdeva, va, 310 vshift - SEGLENGTH, remaining); 311 } else { 312 if (*remaining > vinc) 313 *remaining -= vinc; 314 else 315 *remaining = 0; 316 } 317 if (*remaining == 0) 318 return; 319 } 320 #endif 321 } 322 323 void 324 pmap_probe_pbmt(void) 325 { 326 #ifdef _LP64 327 const register_t mvendorid = sbi_get_mvendorid().value; 328 329 switch (mvendorid) { 330 case CPU_VENDOR_THEAD: 331 if (csr_thead_sxstatus_read() & TX_SXSTATUS_MAEE) { 332 VPRINTF("T-Head XMAE detected.\n"); 333 pmap_pte_pbmt_mask = PTE_XMAE; 334 pmap_pte_pma = PTE_XMAE_PMA; 335 pmap_pte_nc = PTE_XMAE_NC; 336 pmap_pte_io = PTE_XMAE_IO; 337 } 338 break; 339 340 default: 341 break; 342 } 343 344 /* 345 * No fixups of the initial MMU tables. We have to assume 346 * that those were set up correctly in locore.S. The variables 347 * set above are for new mappings created now that the kernel 348 * is up and running. 349 */ 350 #endif 351 } 352 353 void 354 pmap_bootstrap(vaddr_t vstart, vaddr_t vend) 355 { 356 extern pmap_pdetab_t bootstrap_pde[PAGE_SIZE / sizeof(pd_entry_t)]; 357 358 // pmap_pdetab_t * const kptb = &pmap_kern_pdetab; 359 pmap_t pm = pmap_kernel(); 360 361 VPRINTF("common "); 362 pmap_bootstrap_common(); 363 364 #ifdef MULTIPROCESSOR 365 VPRINTF("cpusets "); 366 struct cpu_info * const ci = curcpu(); 367 kcpuset_create(&ci->ci_shootdowncpus, true); 368 #endif 369 370 VPRINTF("bs_pde %p ", bootstrap_pde); 371 372 // kend = (kend + 0x200000 - 1) & -0x200000; 373 374 /* Use the tables we already built in init_riscv() */ 375 pm->pm_pdetab = bootstrap_pde; 376 377 /* Get the PPN for our page table root */ 378 pm->pm_md.md_ppn = atop(KERN_VTOPHYS((vaddr_t)bootstrap_pde)); 379 380 /* Setup basic info like pagesize=PAGE_SIZE */ 381 // uvm_md_init(); 382 383 /* init the lock */ 384 // XXXNH per cpu? 385 pmap_tlb_info_init(&pmap_tlb0_info); 386 387 VPRINTF("ASID max %x ", pmap_tlb0_info.ti_asid_max); 388 389 #ifdef MULTIPROCESSOR 390 VPRINTF("kcpusets "); 391 392 kcpuset_create(&pm->pm_onproc, true); 393 kcpuset_create(&pm->pm_active, true); 394 KASSERT(pm->pm_onproc != NULL); 395 KASSERT(pm->pm_active != NULL); 396 397 kcpuset_set(pm->pm_onproc, cpu_index(ci)); 398 kcpuset_set(pm->pm_active, cpu_index(ci)); 399 #endif 400 401 VPRINTF("nkmempages "); 402 /* 403 * Compute the number of pages kmem_arena will have. This will also 404 * be called by uvm_km_bootstrap later, but that doesn't matter 405 */ 406 kmeminit_nkmempages(); 407 408 /* Get size of buffer cache and set an upper limit */ 409 buf_setvalimit((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 8); 410 vsize_t bufsz = buf_memcalc(); 411 buf_setvalimit(bufsz); 412 413 vsize_t kvmsize = (VM_PHYS_SIZE + (ubc_nwins << ubc_winshift) + 414 bufsz + 16 * NCARGS + pager_map_size) + 415 /*(maxproc * UPAGES) + */nkmempages * NBPG; 416 417 #ifdef SYSVSHM 418 kvmsize += shminfo.shmall; 419 #endif 420 421 /* Calculate VA address space and roundup to NBSEG tables */ 422 kvmsize = roundup(kvmsize, NBSEG); 423 424 425 /* 426 * Initialize `FYI' variables. Note we're relying on 427 * the fact that BSEARCH sorts the vm_physmem[] array 428 * for us. Must do this before uvm_pageboot_alloc() 429 * can be called. 430 */ 431 pmap_limits.avail_start = ptoa(uvm_physseg_get_start(uvm_physseg_get_first())); 432 pmap_limits.avail_end = ptoa(uvm_physseg_get_end(uvm_physseg_get_last())); 433 434 /* 435 * Update the naive settings in pmap_limits to the actual KVA range. 436 */ 437 pmap_limits.virtual_start = vstart; 438 pmap_limits.virtual_end = vend; 439 440 VPRINTF("limits: %" PRIxVADDR " - %" PRIxVADDR "\n", vstart, vend); 441 442 const vaddr_t kvmstart = vstart; 443 pmap_curmaxkvaddr = vstart + kvmsize; 444 445 VPRINTF("kva : %" PRIxVADDR " - %" PRIxVADDR "\n", kvmstart, 446 pmap_curmaxkvaddr); 447 448 pmap_md_grow(pmap_kernel()->pm_pdetab, kvmstart, XSEGSHIFT, &kvmsize); 449 450 /* 451 * Initialize the pools. 452 */ 453 454 pool_init(&pmap_pmap_pool, PMAP_SIZE, 0, 0, 0, "pmappl", 455 &pool_allocator_nointr, IPL_NONE); 456 457 pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, 0, 0, "pvpl", 458 #ifdef KASAN 459 NULL, 460 #else 461 &pmap_pv_page_allocator, 462 #endif 463 IPL_NONE); 464 465 // riscv_dcache_align 466 pmap_pvlist_lock_init(CACHE_LINE_SIZE); 467 } 468 469 470 vsize_t 471 pmap_kenter_range(vaddr_t va, paddr_t pa, vsize_t size, 472 vm_prot_t prot, u_int flags) 473 { 474 extern pd_entry_t l1_pte[PAGE_SIZE / sizeof(pd_entry_t)]; 475 476 vaddr_t sva = MEGAPAGE_TRUNC(va); 477 paddr_t spa = MEGAPAGE_TRUNC(pa); 478 const vaddr_t eva = MEGAPAGE_ROUND(va + size); 479 const vaddr_t pdetab_mask = PMAP_PDETABSIZE - 1; 480 const vsize_t vshift = SEGSHIFT; 481 const pt_entry_t pbmt_flag = pte_enter_flags_to_pbmt(flags); 482 483 while (sva < eva) { 484 const size_t sidx = (sva >> vshift) & pdetab_mask; 485 486 l1_pte[sidx] = PA_TO_PTE(spa) | PTE_KERN | PTE_HARDWIRED | 487 PTE_RW | pbmt_flag; 488 spa += NBSEG; 489 sva += NBSEG; 490 } 491 492 return 0; 493 } 494