1 /* $NetBSD: cpufunc.c,v 1.36 2024/02/07 04:20:26 msaitoh Exp $ */ 2 3 /* 4 * Copyright (c) 2017 Ryo Shimizu 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "opt_cpuoptions.h" 30 #include "opt_multiprocessor.h" 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: cpufunc.c,v 1.36 2024/02/07 04:20:26 msaitoh Exp $"); 34 35 #include <sys/param.h> 36 #include <sys/types.h> 37 #include <sys/kmem.h> 38 #include <sys/cpu.h> 39 40 #include <uvm/uvm.h> 41 #include <uvm/uvm_page.h> 42 43 #include <arm/cpufunc.h> 44 45 u_int cputype; /* compat arm */ 46 u_int arm_dcache_align; /* compat arm */ 47 u_int arm_dcache_align_mask; /* compat arm */ 48 u_int arm_dcache_maxline; 49 50 u_int aarch64_cache_vindexsize; 51 u_int aarch64_cache_prefer_mask; 52 53 int aarch64_hafdbs_enabled __read_mostly; 54 int aarch64_pan_enabled __read_mostly; 55 int aarch64_pac_enabled __read_mostly; 56 57 static void __noasan 58 extract_cacheunit(int level, bool insn, int cachetype, 59 struct aarch64_cache_info *cacheinfo) 60 { 61 struct aarch64_cache_unit *cunit; 62 uint64_t ccsidr, mmfr2; 63 64 /* select and extract level N data cache */ 65 reg_csselr_el1_write(__SHIFTIN(level, CSSELR_LEVEL) | 66 __SHIFTIN(insn ? 1 : 0, CSSELR_IND)); 67 isb(); 68 69 ccsidr = reg_ccsidr_el1_read(); 70 mmfr2 = reg_id_aa64mmfr2_el1_read(); 71 72 if (insn) 73 cunit = &cacheinfo[level].icache; 74 else 75 cunit = &cacheinfo[level].dcache; 76 77 cunit->cache_type = cachetype; 78 79 switch (__SHIFTOUT(mmfr2, ID_AA64MMFR2_EL1_CCIDX)) { 80 case ID_AA64MMFR2_EL1_CCIDX_32BIT: 81 cunit->cache_line_size = 82 1 << (__SHIFTOUT(ccsidr, CCSIDR_LINESIZE) + 4); 83 cunit->cache_ways = __SHIFTOUT(ccsidr, CCSIDR_ASSOC) + 1; 84 cunit->cache_sets = __SHIFTOUT(ccsidr, CCSIDR_NUMSET) + 1; 85 break; 86 case ID_AA64MMFR2_EL1_CCIDX_64BIT: 87 cunit->cache_line_size = 88 1 << (__SHIFTOUT(ccsidr, CCSIDR64_LINESIZE) + 4); 89 cunit->cache_ways = __SHIFTOUT(ccsidr, CCSIDR64_ASSOC) + 1; 90 cunit->cache_sets = __SHIFTOUT(ccsidr, CCSIDR64_NUMSET) + 1; 91 break; 92 } 93 94 /* calc waysize and whole size */ 95 cunit->cache_way_size = cunit->cache_line_size * cunit->cache_sets; 96 cunit->cache_size = cunit->cache_way_size * cunit->cache_ways; 97 } 98 99 100 /* Must be called on each processor */ 101 void __noasan 102 aarch64_getcacheinfo(struct cpu_info *ci) 103 { 104 struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo; 105 uint32_t clidr, ctr; 106 int level, cachetype; 107 108 /* 109 * CTR - Cache Type Register 110 */ 111 ctr = reg_ctr_el0_read(); 112 switch (__SHIFTOUT(ctr, CTR_EL0_L1IP_MASK)) { 113 case CTR_EL0_L1IP_VPIPT: 114 cachetype = CACHE_TYPE_VPIPT; 115 break; 116 case CTR_EL0_L1IP_AIVIVT: 117 cachetype = CACHE_TYPE_VIVT; 118 break; 119 case CTR_EL0_L1IP_VIPT: 120 cachetype = CACHE_TYPE_VIPT; 121 break; 122 case CTR_EL0_L1IP_PIPT: 123 cachetype = CACHE_TYPE_PIPT; 124 break; 125 } 126 127 /* 128 * CLIDR - Cache Level ID Register 129 * CSSELR - Cache Size Selection Register 130 * CCSIDR - CurrentCache Size ID Register (selected by CSSELR) 131 */ 132 133 /* L1, L2, L3, ..., L8 cache */ 134 for (level = 0, clidr = reg_clidr_el1_read(); 135 level < MAX_CACHE_LEVEL; level++, clidr >>= 3) { 136 137 int cacheable; 138 139 switch (clidr & 7) { 140 case CLIDR_TYPE_NOCACHE: 141 cacheable = CACHE_CACHEABLE_NONE; 142 break; 143 case CLIDR_TYPE_ICACHE: 144 cacheable = CACHE_CACHEABLE_ICACHE; 145 extract_cacheunit(level, true, cachetype, cinfo); 146 break; 147 case CLIDR_TYPE_DCACHE: 148 cacheable = CACHE_CACHEABLE_DCACHE; 149 extract_cacheunit(level, false, CACHE_TYPE_PIPT, cinfo); 150 break; 151 case CLIDR_TYPE_IDCACHE: 152 cacheable = CACHE_CACHEABLE_IDCACHE; 153 extract_cacheunit(level, true, cachetype, cinfo); 154 extract_cacheunit(level, false, CACHE_TYPE_PIPT, cinfo); 155 break; 156 case CLIDR_TYPE_UNIFIEDCACHE: 157 cacheable = CACHE_CACHEABLE_UNIFIED; 158 extract_cacheunit(level, false, CACHE_TYPE_PIPT, cinfo); 159 break; 160 default: 161 cacheable = CACHE_CACHEABLE_NONE; 162 break; 163 } 164 165 cinfo[level].cacheable = cacheable; 166 if (cacheable == CACHE_CACHEABLE_NONE) { 167 /* no more level */ 168 break; 169 } 170 171 /* 172 * L1 insn cachetype is CTR_EL0:L1IP, 173 * all other cachetype is PIPT. 174 */ 175 cachetype = CACHE_TYPE_PIPT; 176 } 177 } 178 179 180 void 181 aarch64_parsecacheinfo(struct cpu_info *ci) 182 { 183 struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo; 184 struct aarch64_sysctl_cpu_id *id = &ci->ci_id; 185 const uint32_t ctr = id->ac_ctr; 186 u_int vindexsize; 187 188 /* remember maximum alignment */ 189 if (arm_dcache_maxline < __SHIFTOUT(ctr, CTR_EL0_DMIN_LINE)) { 190 arm_dcache_maxline = __SHIFTOUT(ctr, CTR_EL0_DMIN_LINE); 191 arm_dcache_align = sizeof(int) << arm_dcache_maxline; 192 arm_dcache_align_mask = arm_dcache_align - 1; 193 } 194 195 #ifdef MULTIPROCESSOR 196 if (coherency_unit < arm_dcache_align) 197 panic("coherency_unit %ld < %d; increase COHERENCY_UNIT", 198 coherency_unit, arm_dcache_align); 199 #endif 200 201 /* calculate L1 icache virtual index size */ 202 if ((cinfo[0].icache.cache_type == CACHE_TYPE_VIVT || 203 cinfo[0].icache.cache_type == CACHE_TYPE_VIPT) && 204 (cinfo[0].cacheable == CACHE_CACHEABLE_ICACHE || 205 cinfo[0].cacheable == CACHE_CACHEABLE_IDCACHE)) { 206 207 vindexsize = 208 cinfo[0].icache.cache_size / 209 cinfo[0].icache.cache_ways; 210 211 KASSERT(vindexsize != 0); 212 } else { 213 vindexsize = 0; 214 } 215 216 if (vindexsize > aarch64_cache_vindexsize) { 217 aarch64_cache_vindexsize = vindexsize; 218 aarch64_cache_prefer_mask = vindexsize - 1; 219 220 if (uvm.page_init_done) 221 uvm_page_recolor(vindexsize / PAGE_SIZE); 222 } 223 } 224 225 static int 226 prt_cache(device_t self, struct aarch64_cache_info *cinfo, int level) 227 { 228 struct aarch64_cache_unit *cunit; 229 int i; 230 const char *cacheable, *cachetype; 231 232 if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE) 233 return -1; 234 235 for (i = 0; i < 2; i++) { 236 switch (cinfo[level].cacheable) { 237 case CACHE_CACHEABLE_ICACHE: 238 cunit = &cinfo[level].icache; 239 cacheable = "Instruction"; 240 break; 241 case CACHE_CACHEABLE_DCACHE: 242 cunit = &cinfo[level].dcache; 243 cacheable = "Data"; 244 break; 245 case CACHE_CACHEABLE_IDCACHE: 246 if (i == 0) { 247 cunit = &cinfo[level].icache; 248 cacheable = "Instruction"; 249 } else { 250 cunit = &cinfo[level].dcache; 251 cacheable = "Data"; 252 } 253 break; 254 case CACHE_CACHEABLE_UNIFIED: 255 cunit = &cinfo[level].dcache; 256 cacheable = "Unified"; 257 break; 258 default: 259 cunit = &cinfo[level].dcache; 260 cacheable = "*UNK*"; 261 break; 262 } 263 264 switch (cunit->cache_type) { 265 case CACHE_TYPE_VPIPT: 266 cachetype = "VPIPT"; 267 break; 268 case CACHE_TYPE_VIVT: 269 cachetype = "VIVT"; 270 break; 271 case CACHE_TYPE_VIPT: 272 cachetype = "VIPT"; 273 break; 274 case CACHE_TYPE_PIPT: 275 cachetype = "PIPT"; 276 break; 277 default: 278 cachetype = "*UNK*"; 279 break; 280 } 281 282 aprint_verbose_dev(self, 283 "L%d %uKB/%uB %u-way (%u set) %s %s cache\n", 284 level + 1, 285 cunit->cache_size / 1024, 286 cunit->cache_line_size, 287 cunit->cache_ways, 288 cunit->cache_sets, 289 cachetype, cacheable); 290 291 if (cinfo[level].cacheable != CACHE_CACHEABLE_IDCACHE) 292 break; 293 } 294 295 return 0; 296 } 297 298 void 299 aarch64_printcacheinfo(device_t dev, struct cpu_info *ci) 300 { 301 struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo; 302 int level; 303 304 for (level = 0; level < MAX_CACHE_LEVEL; level++) 305 if (prt_cache(dev, cinfo, level) < 0) 306 break; 307 } 308 309 310 311 static inline void 312 ln_dcache_wb_all(int level, struct aarch64_cache_unit *cunit) 313 { 314 uint64_t x; 315 unsigned int set, way, setshift, wayshift; 316 317 setshift = ffs(cunit->cache_line_size) - 1; 318 wayshift = 32 - (ffs(cunit->cache_ways) - 1); 319 320 for (way = 0; way < cunit->cache_ways; way++) { 321 for (set = 0; set < cunit->cache_sets; set++) { 322 x = (way << wayshift) | (set << setshift) | 323 (level << 1); 324 __asm __volatile ("dc csw, %0; dsb sy" :: "r"(x)); 325 } 326 } 327 } 328 329 static inline void 330 ln_dcache_wbinv_all(int level, struct aarch64_cache_unit *cunit) 331 { 332 uint64_t x; 333 unsigned int set, way, setshift, wayshift; 334 335 setshift = ffs(cunit->cache_line_size) - 1; 336 wayshift = 32 - (ffs(cunit->cache_ways) - 1); 337 338 for (way = 0; way < cunit->cache_ways; way++) { 339 for (set = 0; set < cunit->cache_sets; set++) { 340 x = (way << wayshift) | (set << setshift) | 341 (level << 1); 342 __asm __volatile ("dc cisw, %0; dsb sy" :: "r"(x)); 343 } 344 } 345 } 346 347 static inline void 348 ln_dcache_inv_all(int level, struct aarch64_cache_unit *cunit) 349 { 350 uint64_t x; 351 unsigned int set, way, setshift, wayshift; 352 353 setshift = ffs(cunit->cache_line_size) - 1; 354 wayshift = 32 - (ffs(cunit->cache_ways) - 1); 355 356 for (way = 0; way < cunit->cache_ways; way++) { 357 for (set = 0; set < cunit->cache_sets; set++) { 358 x = (way << wayshift) | (set << setshift) | 359 (level << 1); 360 __asm __volatile ("dc isw, %0; dsb sy" :: "r"(x)); 361 } 362 } 363 } 364 365 void 366 aarch64_dcache_wbinv_all(void) 367 { 368 KASSERT(kpreempt_disabled()); 369 370 struct cpu_info * const ci = curcpu(); 371 struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo; 372 int level; 373 374 for (level = 0; level < MAX_CACHE_LEVEL; level++) { 375 if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE) 376 break; 377 378 dsb(ish); 379 ln_dcache_wbinv_all(level, &cinfo[level].dcache); 380 } 381 dsb(ish); 382 } 383 384 void 385 aarch64_dcache_inv_all(void) 386 { 387 KASSERT(kpreempt_disabled()); 388 389 struct cpu_info * const ci = curcpu(); 390 struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo; 391 int level; 392 393 for (level = 0; level < MAX_CACHE_LEVEL; level++) { 394 if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE) 395 break; 396 397 dsb(ish); 398 ln_dcache_inv_all(level, &cinfo[level].dcache); 399 } 400 dsb(ish); 401 } 402 403 void 404 aarch64_dcache_wb_all(void) 405 { 406 KASSERT(kpreempt_disabled()); 407 408 struct cpu_info * const ci = curcpu(); 409 struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo; 410 int level; 411 412 for (level = 0; level < MAX_CACHE_LEVEL; level++) { 413 if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE) 414 break; 415 416 dsb(ish); 417 ln_dcache_wb_all(level, &cinfo[level].dcache); 418 } 419 dsb(ish); 420 } 421 422 int 423 set_cpufuncs(void) 424 { 425 // This is only called from the BP 426 427 return aarch64_setcpufuncs(&cpu_info_store[0]); 428 } 429 430 431 int 432 aarch64_setcpufuncs(struct cpu_info *ci) 433 { 434 const uint64_t ctr = reg_ctr_el0_read(); 435 const uint64_t clidr = reg_clidr_el1_read(); 436 437 /* install default functions */ 438 ci->ci_cpufuncs.cf_set_ttbr0 = aarch64_set_ttbr0; 439 ci->ci_cpufuncs.cf_icache_sync_range = aarch64_icache_sync_range; 440 441 /* 442 * install core/cluster specific functions 443 */ 444 445 /* Icache sync op */ 446 if (__SHIFTOUT(ctr, CTR_EL0_DIC) == 1) { 447 /* Icache invalidation to the PoU is not required */ 448 ci->ci_cpufuncs.cf_icache_sync_range = 449 aarch64_icache_barrier_range; 450 } else if (__SHIFTOUT(ctr, CTR_EL0_IDC) == 1 || 451 __SHIFTOUT(clidr, CLIDR_LOC) == 0 || 452 (__SHIFTOUT(clidr, CLIDR_LOUIS) == 0 && __SHIFTOUT(clidr, CLIDR_LOUU) == 0)) { 453 /* Dcache clean to the PoU is not required for Icache */ 454 ci->ci_cpufuncs.cf_icache_sync_range = 455 aarch64_icache_inv_range; 456 } 457 458 #ifdef CPU_THUNDERX 459 const uint32_t midr = reg_midr_el1_read(); 460 461 /* Cavium erratum 27456 */ 462 if ((midr == CPU_ID_THUNDERXP1d0) || 463 (midr == CPU_ID_THUNDERXP1d1) || 464 (midr == CPU_ID_THUNDERXP2d1) || 465 (midr == CPU_ID_THUNDERX81XXRX)) { 466 ci->ci_cpufuncs.cf_set_ttbr0 = aarch64_set_ttbr0_thunderx; 467 } 468 #endif 469 470 return 0; 471 } 472 473 void 474 aarch64_hafdbs_init(int primary) 475 { 476 #ifdef ARMV81_HAFDBS 477 uint64_t tcr; 478 int hafdbs; 479 480 hafdbs = __SHIFTOUT(reg_id_aa64mmfr1_el1_read(), 481 ID_AA64MMFR1_EL1_HAFDBS); 482 483 /* 484 * hafdbs 485 * 0:HAFDBS_NONE - no support for any hardware flags 486 * 1:HAFDBS_A - only hardware access flag supported 487 * 2:HAFDBS_AD - hardware access and modified flags supported. 488 */ 489 490 if (primary) { 491 /* CPU0 does the detection. */ 492 switch (hafdbs) { 493 case ID_AA64MMFR1_EL1_HAFDBS_NONE: 494 default: 495 aarch64_hafdbs_enabled = 0; 496 break; 497 case ID_AA64MMFR1_EL1_HAFDBS_A: 498 case ID_AA64MMFR1_EL1_HAFDBS_AD: 499 aarch64_hafdbs_enabled = hafdbs; 500 break; 501 } 502 } else { 503 /* 504 * The support status of HAFDBS on the primary CPU is different 505 * from that of the application processor. 506 * 507 * XXX: 508 * The correct way to do this is to disable it on all cores, 509 * or call pmap_fault_fixup() only on the unsupported cores, 510 * but for now, do panic(). 511 */ 512 if (aarch64_hafdbs_enabled != hafdbs) 513 panic("HAFDBS is supported (%d) on primary cpu, " 514 "but isn't equal (%d) on secondary cpu", 515 aarch64_hafdbs_enabled, hafdbs); 516 } 517 518 /* enable Hardware updates to Access flag and Dirty state */ 519 tcr = reg_tcr_el1_read(); 520 switch (hafdbs) { 521 case ID_AA64MMFR1_EL1_HAFDBS_NONE: 522 default: 523 break; 524 case ID_AA64MMFR1_EL1_HAFDBS_A: 525 /* enable only access */ 526 reg_tcr_el1_write(tcr | TCR_HA); 527 isb(); 528 break; 529 case ID_AA64MMFR1_EL1_HAFDBS_AD: 530 /* enable both access and dirty */ 531 reg_tcr_el1_write(tcr | TCR_HD | TCR_HA); 532 isb(); 533 break; 534 } 535 #endif 536 } 537 538 void 539 aarch64_pan_init(int primary) 540 { 541 #ifdef ARMV81_PAN 542 uint64_t reg, sctlr; 543 544 /* CPU0 does the detection. */ 545 if (primary) { 546 reg = reg_id_aa64mmfr1_el1_read(); 547 if (__SHIFTOUT(reg, ID_AA64MMFR1_EL1_PAN) != 548 ID_AA64MMFR1_EL1_PAN_NONE) 549 aarch64_pan_enabled = 1; 550 } 551 552 if (!aarch64_pan_enabled) 553 return; 554 555 /* 556 * On an exception to EL1, have the CPU set the PAN bit automatically. 557 * This ensures PAN is enabled each time the kernel is entered. 558 */ 559 sctlr = reg_sctlr_el1_read(); 560 sctlr &= ~SCTLR_SPAN; 561 reg_sctlr_el1_write(sctlr); 562 563 /* Set the PAN bit right now. */ 564 reg_pan_write(1); 565 #endif 566 } 567 568 /* 569 * In order to avoid inconsistencies with pointer authentication 570 * in this function itself, the caller must enable PAC according 571 * to the return value. 572 */ 573 int 574 aarch64_pac_init(int primary) 575 { 576 #ifdef ARMV83_PAC 577 uint64_t reg; 578 579 /* CPU0 does the detection. */ 580 if (primary) { 581 reg = reg_id_aa64isar1_el1_read(); 582 if (__SHIFTOUT(reg, ID_AA64ISAR1_EL1_APA) != 583 ID_AA64ISAR1_EL1_APA_NONE) 584 aarch64_pac_enabled = 1; 585 if (__SHIFTOUT(reg, ID_AA64ISAR1_EL1_API) != 586 ID_AA64ISAR1_EL1_API_NONE) 587 aarch64_pac_enabled = 1; 588 if (__SHIFTOUT(reg, ID_AA64ISAR1_EL1_GPA) != 589 ID_AA64ISAR1_EL1_GPA_NONE) 590 aarch64_pac_enabled = 1; 591 if (__SHIFTOUT(reg, ID_AA64ISAR1_EL1_GPI) != 592 ID_AA64ISAR1_EL1_GPI_NONE) 593 aarch64_pac_enabled = 1; 594 } 595 596 if (!aarch64_pac_enabled) 597 return -1; 598 599 /* Set the key. Curlwp here is the CPU's idlelwp. */ 600 reg_APIAKeyLo_EL1_write(curlwp->l_md.md_ia_kern[0]); 601 reg_APIAKeyHi_EL1_write(curlwp->l_md.md_ia_kern[1]); 602 603 return 0; 604 #else 605 return -1; 606 #endif 607 } 608