1 /* $NetBSD: cpufunc.c,v 1.37 2026/02/03 08:47:05 skrll Exp $ */ 2 3 /* 4 * Copyright (c) 2017 Ryo Shimizu 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "opt_cpuoptions.h" 30 #include "opt_multiprocessor.h" 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: cpufunc.c,v 1.37 2026/02/03 08:47:05 skrll Exp $"); 34 35 #include <sys/param.h> 36 #include <sys/types.h> 37 #include <sys/kmem.h> 38 #include <sys/cpu.h> 39 40 #include <uvm/uvm.h> 41 #include <uvm/uvm_page.h> 42 43 #include <arm/cpufunc.h> 44 45 u_int cputype; /* compat arm */ 46 u_int arm_dcache_align; /* compat arm */ 47 u_int arm_dcache_align_mask; /* compat arm */ 48 u_int arm_dcache_maxline; 49 50 u_int aarch64_cache_vindexsize; 51 u_int aarch64_cache_prefer_mask; 52 53 int aarch64_hafdbs_enabled __read_mostly; 54 int aarch64_pan_enabled __read_mostly; 55 int aarch64_pac_enabled __read_mostly; 56 57 static void __noasan 58 extract_cacheunit(int level, bool insn, int cachetype, 59 struct aarch64_cache_info *cacheinfo) 60 { 61 struct aarch64_cache_unit *cunit; 62 uint64_t ccsidr, mmfr2; 63 64 /* select and extract level N data cache */ 65 reg_csselr_el1_write(__SHIFTIN(level, CSSELR_LEVEL) | 66 __SHIFTIN(insn ? 1 : 0, CSSELR_IND)); 67 isb(); 68 69 ccsidr = reg_ccsidr_el1_read(); 70 mmfr2 = reg_id_aa64mmfr2_el1_read(); 71 72 if (insn) 73 cunit = &cacheinfo[level].icache; 74 else 75 cunit = &cacheinfo[level].dcache; 76 77 cunit->cache_type = cachetype; 78 79 switch (__SHIFTOUT(mmfr2, ID_AA64MMFR2_EL1_CCIDX)) { 80 case ID_AA64MMFR2_EL1_CCIDX_32BIT: 81 cunit->cache_line_size = 82 1 << (__SHIFTOUT(ccsidr, CCSIDR_LINESIZE) + 4); 83 cunit->cache_ways = __SHIFTOUT(ccsidr, CCSIDR_ASSOC) + 1; 84 cunit->cache_sets = __SHIFTOUT(ccsidr, CCSIDR_NUMSET) + 1; 85 break; 86 case ID_AA64MMFR2_EL1_CCIDX_64BIT: 87 cunit->cache_line_size = 88 1 << (__SHIFTOUT(ccsidr, CCSIDR64_LINESIZE) + 4); 89 cunit->cache_ways = __SHIFTOUT(ccsidr, CCSIDR64_ASSOC) + 1; 90 cunit->cache_sets = __SHIFTOUT(ccsidr, CCSIDR64_NUMSET) + 1; 91 break; 92 } 93 94 /* calc waysize and whole size */ 95 cunit->cache_way_size = cunit->cache_line_size * cunit->cache_sets; 96 cunit->cache_size = cunit->cache_way_size * cunit->cache_ways; 97 } 98 99 /* Must be called on each processor */ 100 void __noasan 101 aarch64_getcacheinfo(struct cpu_info *ci) 102 { 103 struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo; 104 uint32_t clidr, ctr; 105 int level, cachetype; 106 107 /* 108 * CTR - Cache Type Register 109 */ 110 ctr = reg_ctr_el0_read(); 111 switch (__SHIFTOUT(ctr, CTR_EL0_L1IP_MASK)) { 112 case CTR_EL0_L1IP_VPIPT: 113 cachetype = CACHE_TYPE_VPIPT; 114 break; 115 case CTR_EL0_L1IP_AIVIVT: 116 cachetype = CACHE_TYPE_VIVT; 117 break; 118 case CTR_EL0_L1IP_VIPT: 119 cachetype = CACHE_TYPE_VIPT; 120 break; 121 case CTR_EL0_L1IP_PIPT: 122 cachetype = CACHE_TYPE_PIPT; 123 break; 124 } 125 126 /* 127 * CLIDR - Cache Level ID Register 128 * CSSELR - Cache Size Selection Register 129 * CCSIDR - CurrentCache Size ID Register (selected by CSSELR) 130 */ 131 132 /* L1, L2, L3, ..., L8 cache */ 133 for (level = 0, clidr = reg_clidr_el1_read(); 134 level < MAX_CACHE_LEVEL; level++, clidr >>= 3) { 135 136 int cacheable; 137 138 switch (clidr & 7) { 139 case CLIDR_TYPE_NOCACHE: 140 cacheable = CACHE_CACHEABLE_NONE; 141 break; 142 case CLIDR_TYPE_ICACHE: 143 cacheable = CACHE_CACHEABLE_ICACHE; 144 extract_cacheunit(level, true, cachetype, cinfo); 145 break; 146 case CLIDR_TYPE_DCACHE: 147 cacheable = CACHE_CACHEABLE_DCACHE; 148 extract_cacheunit(level, false, CACHE_TYPE_PIPT, cinfo); 149 break; 150 case CLIDR_TYPE_IDCACHE: 151 cacheable = CACHE_CACHEABLE_IDCACHE; 152 extract_cacheunit(level, true, cachetype, cinfo); 153 extract_cacheunit(level, false, CACHE_TYPE_PIPT, cinfo); 154 break; 155 case CLIDR_TYPE_UNIFIEDCACHE: 156 cacheable = CACHE_CACHEABLE_UNIFIED; 157 extract_cacheunit(level, false, CACHE_TYPE_PIPT, cinfo); 158 break; 159 default: 160 cacheable = CACHE_CACHEABLE_NONE; 161 break; 162 } 163 164 cinfo[level].cacheable = cacheable; 165 if (cacheable == CACHE_CACHEABLE_NONE) { 166 /* no more level */ 167 break; 168 } 169 170 /* 171 * L1 insn cachetype is CTR_EL0:L1IP, 172 * all other cachetype is PIPT. 173 */ 174 cachetype = CACHE_TYPE_PIPT; 175 } 176 } 177 178 void 179 aarch64_parsecacheinfo(struct cpu_info *ci) 180 { 181 struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo; 182 struct aarch64_sysctl_cpu_id *id = &ci->ci_id; 183 const uint32_t ctr = id->ac_ctr; 184 u_int vindexsize; 185 186 /* remember maximum alignment */ 187 if (arm_dcache_maxline < __SHIFTOUT(ctr, CTR_EL0_DMIN_LINE)) { 188 arm_dcache_maxline = __SHIFTOUT(ctr, CTR_EL0_DMIN_LINE); 189 arm_dcache_align = sizeof(int) << arm_dcache_maxline; 190 arm_dcache_align_mask = arm_dcache_align - 1; 191 } 192 193 #ifdef MULTIPROCESSOR 194 if (coherency_unit < arm_dcache_align) 195 panic("coherency_unit %ld < %d; increase COHERENCY_UNIT", 196 coherency_unit, arm_dcache_align); 197 #endif 198 199 /* calculate L1 icache virtual index size */ 200 if ((cinfo[0].icache.cache_type == CACHE_TYPE_VIVT || 201 cinfo[0].icache.cache_type == CACHE_TYPE_VIPT) && 202 (cinfo[0].cacheable == CACHE_CACHEABLE_ICACHE || 203 cinfo[0].cacheable == CACHE_CACHEABLE_IDCACHE)) { 204 205 vindexsize = 206 cinfo[0].icache.cache_size / 207 cinfo[0].icache.cache_ways; 208 209 KASSERT(vindexsize != 0); 210 } else { 211 vindexsize = 0; 212 } 213 214 if (vindexsize > aarch64_cache_vindexsize) { 215 aarch64_cache_vindexsize = vindexsize; 216 aarch64_cache_prefer_mask = vindexsize - 1; 217 218 if (uvm.page_init_done) 219 uvm_page_recolor(vindexsize / PAGE_SIZE); 220 } 221 } 222 223 static int 224 prt_cache(device_t self, struct aarch64_cache_info *cinfo, int level) 225 { 226 struct aarch64_cache_unit *cunit; 227 int i; 228 const char *cacheable, *cachetype; 229 230 if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE) 231 return -1; 232 233 for (i = 0; i < 2; i++) { 234 switch (cinfo[level].cacheable) { 235 case CACHE_CACHEABLE_ICACHE: 236 cunit = &cinfo[level].icache; 237 cacheable = "Instruction"; 238 break; 239 case CACHE_CACHEABLE_DCACHE: 240 cunit = &cinfo[level].dcache; 241 cacheable = "Data"; 242 break; 243 case CACHE_CACHEABLE_IDCACHE: 244 if (i == 0) { 245 cunit = &cinfo[level].icache; 246 cacheable = "Instruction"; 247 } else { 248 cunit = &cinfo[level].dcache; 249 cacheable = "Data"; 250 } 251 break; 252 case CACHE_CACHEABLE_UNIFIED: 253 cunit = &cinfo[level].dcache; 254 cacheable = "Unified"; 255 break; 256 default: 257 cunit = &cinfo[level].dcache; 258 cacheable = "*UNK*"; 259 break; 260 } 261 262 switch (cunit->cache_type) { 263 case CACHE_TYPE_VPIPT: 264 cachetype = "VPIPT"; 265 break; 266 case CACHE_TYPE_VIVT: 267 cachetype = "VIVT"; 268 break; 269 case CACHE_TYPE_VIPT: 270 cachetype = "VIPT"; 271 break; 272 case CACHE_TYPE_PIPT: 273 cachetype = "PIPT"; 274 break; 275 default: 276 cachetype = "*UNK*"; 277 break; 278 } 279 280 aprint_verbose_dev(self, 281 "L%d %uKB/%uB %u-way (%u set) %s %s cache\n", 282 level + 1, 283 cunit->cache_size / 1024, 284 cunit->cache_line_size, 285 cunit->cache_ways, 286 cunit->cache_sets, 287 cachetype, cacheable); 288 289 if (cinfo[level].cacheable != CACHE_CACHEABLE_IDCACHE) 290 break; 291 } 292 293 return 0; 294 } 295 296 void 297 aarch64_printcacheinfo(device_t dev, struct cpu_info *ci) 298 { 299 struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo; 300 int level; 301 302 for (level = 0; level < MAX_CACHE_LEVEL; level++) 303 if (prt_cache(dev, cinfo, level) < 0) 304 break; 305 } 306 307 static inline void 308 ln_dcache_wb_all(int level, struct aarch64_cache_unit *cunit) 309 { 310 uint64_t x; 311 unsigned int set, way, setshift, wayshift; 312 313 setshift = ffs(cunit->cache_line_size) - 1; 314 wayshift = 32 - (ffs(cunit->cache_ways) - 1); 315 316 for (way = 0; way < cunit->cache_ways; way++) { 317 for (set = 0; set < cunit->cache_sets; set++) { 318 x = (way << wayshift) | (set << setshift) | 319 (level << 1); 320 __asm __volatile ("dc csw, %0; dsb sy" :: "r"(x)); 321 } 322 } 323 } 324 325 static inline void 326 ln_dcache_wbinv_all(int level, struct aarch64_cache_unit *cunit) 327 { 328 uint64_t x; 329 unsigned int set, way, setshift, wayshift; 330 331 setshift = ffs(cunit->cache_line_size) - 1; 332 wayshift = 32 - (ffs(cunit->cache_ways) - 1); 333 334 for (way = 0; way < cunit->cache_ways; way++) { 335 for (set = 0; set < cunit->cache_sets; set++) { 336 x = (way << wayshift) | (set << setshift) | 337 (level << 1); 338 __asm __volatile ("dc cisw, %0; dsb sy" :: "r"(x)); 339 } 340 } 341 } 342 343 static inline void 344 ln_dcache_inv_all(int level, struct aarch64_cache_unit *cunit) 345 { 346 uint64_t x; 347 unsigned int set, way, setshift, wayshift; 348 349 setshift = ffs(cunit->cache_line_size) - 1; 350 wayshift = 32 - (ffs(cunit->cache_ways) - 1); 351 352 for (way = 0; way < cunit->cache_ways; way++) { 353 for (set = 0; set < cunit->cache_sets; set++) { 354 x = (way << wayshift) | (set << setshift) | 355 (level << 1); 356 __asm __volatile ("dc isw, %0; dsb sy" :: "r"(x)); 357 } 358 } 359 } 360 361 void 362 aarch64_dcache_wbinv_all(void) 363 { 364 KASSERT(kpreempt_disabled()); 365 366 struct cpu_info * const ci = curcpu(); 367 struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo; 368 int level; 369 370 for (level = 0; level < MAX_CACHE_LEVEL; level++) { 371 if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE) 372 break; 373 374 dsb(ish); 375 ln_dcache_wbinv_all(level, &cinfo[level].dcache); 376 } 377 dsb(ish); 378 } 379 380 void 381 aarch64_dcache_inv_all(void) 382 { 383 KASSERT(kpreempt_disabled()); 384 385 struct cpu_info * const ci = curcpu(); 386 struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo; 387 int level; 388 389 for (level = 0; level < MAX_CACHE_LEVEL; level++) { 390 if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE) 391 break; 392 393 dsb(ish); 394 ln_dcache_inv_all(level, &cinfo[level].dcache); 395 } 396 dsb(ish); 397 } 398 399 void 400 aarch64_dcache_wb_all(void) 401 { 402 KASSERT(kpreempt_disabled()); 403 404 struct cpu_info * const ci = curcpu(); 405 struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo; 406 int level; 407 408 for (level = 0; level < MAX_CACHE_LEVEL; level++) { 409 if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE) 410 break; 411 412 dsb(ish); 413 ln_dcache_wb_all(level, &cinfo[level].dcache); 414 } 415 dsb(ish); 416 } 417 418 bool 419 aarch64_earlydevice_va_p(void) 420 { 421 /* This function may be called before enabling MMU, or mapping KVA */ 422 if ((reg_sctlr_el1_read() & SCTLR_M) == 0) 423 return false; 424 425 /* device mapping will be available after pmap_devmap_bootstrap() */ 426 if (!pmap_devmap_bootstrapped_p()) 427 return false; 428 429 return true; 430 } 431 432 int 433 set_cpufuncs(void) 434 { 435 // This is only called from the BP 436 437 return aarch64_setcpufuncs(&cpu_info_store[0]); 438 } 439 440 int 441 aarch64_setcpufuncs(struct cpu_info *ci) 442 { 443 const uint64_t ctr = reg_ctr_el0_read(); 444 const uint64_t clidr = reg_clidr_el1_read(); 445 446 /* install default functions */ 447 ci->ci_cpufuncs.cf_set_ttbr0 = aarch64_set_ttbr0; 448 ci->ci_cpufuncs.cf_icache_sync_range = aarch64_icache_sync_range; 449 450 /* 451 * install core/cluster specific functions 452 */ 453 454 /* Icache sync op */ 455 if (__SHIFTOUT(ctr, CTR_EL0_DIC) == 1) { 456 /* Icache invalidation to the PoU is not required */ 457 ci->ci_cpufuncs.cf_icache_sync_range = 458 aarch64_icache_barrier_range; 459 } else if (__SHIFTOUT(ctr, CTR_EL0_IDC) == 1 || 460 __SHIFTOUT(clidr, CLIDR_LOC) == 0 || 461 (__SHIFTOUT(clidr, CLIDR_LOUIS) == 0 && __SHIFTOUT(clidr, CLIDR_LOUU) == 0)) { 462 /* Dcache clean to the PoU is not required for Icache */ 463 ci->ci_cpufuncs.cf_icache_sync_range = 464 aarch64_icache_inv_range; 465 } 466 467 #ifdef CPU_THUNDERX 468 const uint32_t midr = reg_midr_el1_read(); 469 470 /* Cavium erratum 27456 */ 471 if ((midr == CPU_ID_THUNDERXP1d0) || 472 (midr == CPU_ID_THUNDERXP1d1) || 473 (midr == CPU_ID_THUNDERXP2d1) || 474 (midr == CPU_ID_THUNDERX81XXRX)) { 475 ci->ci_cpufuncs.cf_set_ttbr0 = aarch64_set_ttbr0_thunderx; 476 } 477 #endif 478 479 return 0; 480 } 481 482 void 483 aarch64_hafdbs_init(int primary) 484 { 485 #ifdef ARMV81_HAFDBS 486 uint64_t tcr; 487 int hafdbs; 488 489 hafdbs = __SHIFTOUT(reg_id_aa64mmfr1_el1_read(), 490 ID_AA64MMFR1_EL1_HAFDBS); 491 492 /* 493 * hafdbs 494 * 0:HAFDBS_NONE - no support for any hardware flags 495 * 1:HAFDBS_A - only hardware access flag supported 496 * 2:HAFDBS_AD - hardware access and modified flags supported. 497 */ 498 499 if (primary) { 500 /* CPU0 does the detection. */ 501 switch (hafdbs) { 502 case ID_AA64MMFR1_EL1_HAFDBS_NONE: 503 default: 504 aarch64_hafdbs_enabled = 0; 505 break; 506 case ID_AA64MMFR1_EL1_HAFDBS_A: 507 case ID_AA64MMFR1_EL1_HAFDBS_AD: 508 aarch64_hafdbs_enabled = hafdbs; 509 break; 510 } 511 } else { 512 /* 513 * The support status of HAFDBS on the primary CPU is different 514 * from that of the application processor. 515 * 516 * XXX: 517 * The correct way to do this is to disable it on all cores, 518 * or call pmap_fault_fixup() only on the unsupported cores, 519 * but for now, do panic(). 520 */ 521 if (aarch64_hafdbs_enabled != hafdbs) 522 panic("HAFDBS is supported (%d) on primary cpu, " 523 "but isn't equal (%d) on secondary cpu", 524 aarch64_hafdbs_enabled, hafdbs); 525 } 526 527 /* enable Hardware updates to Access flag and Dirty state */ 528 tcr = reg_tcr_el1_read(); 529 switch (hafdbs) { 530 case ID_AA64MMFR1_EL1_HAFDBS_NONE: 531 default: 532 break; 533 case ID_AA64MMFR1_EL1_HAFDBS_A: 534 /* enable only access */ 535 reg_tcr_el1_write(tcr | TCR_HA); 536 isb(); 537 break; 538 case ID_AA64MMFR1_EL1_HAFDBS_AD: 539 /* enable both access and dirty */ 540 reg_tcr_el1_write(tcr | TCR_HD | TCR_HA); 541 isb(); 542 break; 543 } 544 #endif 545 } 546 547 void 548 aarch64_pan_init(int primary) 549 { 550 #ifdef ARMV81_PAN 551 uint64_t reg, sctlr; 552 553 /* CPU0 does the detection. */ 554 if (primary) { 555 reg = reg_id_aa64mmfr1_el1_read(); 556 if (__SHIFTOUT(reg, ID_AA64MMFR1_EL1_PAN) != 557 ID_AA64MMFR1_EL1_PAN_NONE) 558 aarch64_pan_enabled = 1; 559 } 560 561 if (!aarch64_pan_enabled) 562 return; 563 564 /* 565 * On an exception to EL1, have the CPU set the PAN bit automatically. 566 * This ensures PAN is enabled each time the kernel is entered. 567 */ 568 sctlr = reg_sctlr_el1_read(); 569 sctlr &= ~SCTLR_SPAN; 570 reg_sctlr_el1_write(sctlr); 571 572 /* Set the PAN bit right now. */ 573 reg_pan_write(1); 574 #endif 575 } 576 577 /* 578 * In order to avoid inconsistencies with pointer authentication 579 * in this function itself, the caller must enable PAC according 580 * to the return value. 581 */ 582 int 583 aarch64_pac_init(int primary) 584 { 585 #ifdef ARMV83_PAC 586 uint64_t reg; 587 588 /* CPU0 does the detection. */ 589 if (primary) { 590 reg = reg_id_aa64isar1_el1_read(); 591 if (__SHIFTOUT(reg, ID_AA64ISAR1_EL1_APA) != 592 ID_AA64ISAR1_EL1_APA_NONE) 593 aarch64_pac_enabled = 1; 594 if (__SHIFTOUT(reg, ID_AA64ISAR1_EL1_API) != 595 ID_AA64ISAR1_EL1_API_NONE) 596 aarch64_pac_enabled = 1; 597 if (__SHIFTOUT(reg, ID_AA64ISAR1_EL1_GPA) != 598 ID_AA64ISAR1_EL1_GPA_NONE) 599 aarch64_pac_enabled = 1; 600 if (__SHIFTOUT(reg, ID_AA64ISAR1_EL1_GPI) != 601 ID_AA64ISAR1_EL1_GPI_NONE) 602 aarch64_pac_enabled = 1; 603 } 604 605 if (!aarch64_pac_enabled) 606 return -1; 607 608 /* Set the key. Curlwp here is the CPU's idlelwp. */ 609 reg_APIAKeyLo_EL1_write(curlwp->l_md.md_ia_kern[0]); 610 reg_APIAKeyHi_EL1_write(curlwp->l_md.md_ia_kern[1]); 611 612 return 0; 613 #else 614 return -1; 615 #endif 616 } 617