1 /* $NetBSD: cache.c,v 1.102 2021/01/24 07:36:54 mrg Exp $ */ 2 3 /* 4 * Copyright (c) 1996 5 * The President and Fellows of Harvard College. All rights reserved. 6 * Copyright (c) 1992, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This software was developed by the Computer Systems Engineering group 10 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 11 * contributed to Berkeley. 12 * 13 * All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by Harvard University. 16 * This product includes software developed by the University of 17 * California, Lawrence Berkeley Laboratory. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions 21 * are met: 22 * 23 * 1. Redistributions of source code must retain the above copyright 24 * notice, this list of conditions and the following disclaimer. 25 * 2. Redistributions in binary form must reproduce the above copyright 26 * notice, this list of conditions and the following disclaimer in the 27 * documentation and/or other materials provided with the distribution. 28 * 3. All advertising materials mentioning features or use of this software 29 * must display the following acknowledgement: 30 * This product includes software developed by Aaron Brown and 31 * Harvard University. 32 * This product includes software developed by the University of 33 * California, Berkeley and its contributors. 34 * 4. Neither the name of the University nor the names of its contributors 35 * may be used to endorse or promote products derived from this software 36 * without specific prior written permission. 37 * 38 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 39 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 41 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 42 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 43 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 44 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 46 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 47 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 48 * SUCH DAMAGE. 49 * 50 * @(#)cache.c 8.2 (Berkeley) 10/30/93 51 * 52 */ 53 54 /* 55 * Cache routines. 56 * 57 * TODO: 58 * - rework range flush 59 */ 60 61 #include <sys/cdefs.h> 62 __KERNEL_RCSID(0, "$NetBSD: cache.c,v 1.102 2021/01/24 07:36:54 mrg Exp $"); 63 64 #include "opt_multiprocessor.h" 65 #include "opt_sparc_arch.h" 66 67 #include <sys/param.h> 68 #include <sys/systm.h> 69 #include <sys/kernel.h> 70 71 #include <uvm/uvm_extern.h> 72 73 #include <machine/ctlreg.h> 74 #include <machine/pte.h> 75 #include <machine/locore.h> 76 77 #include <sparc/sparc/asm.h> 78 #include <sparc/sparc/cache.h> 79 #include <sparc/sparc/cpuvar.h> 80 81 struct evcnt vcache_flush_pg = 82 EVCNT_INITIALIZER(EVCNT_TYPE_MISC,0,"vcfl","pg"); 83 EVCNT_ATTACH_STATIC(vcache_flush_pg); 84 struct evcnt vcache_flush_seg = 85 EVCNT_INITIALIZER(EVCNT_TYPE_MISC,0,"vcfl","seg"); 86 EVCNT_ATTACH_STATIC(vcache_flush_seg); 87 struct evcnt vcache_flush_reg = 88 EVCNT_INITIALIZER(EVCNT_TYPE_MISC,0,"vcfl","reg"); 89 EVCNT_ATTACH_STATIC(vcache_flush_reg); 90 struct evcnt vcache_flush_ctx = 91 EVCNT_INITIALIZER(EVCNT_TYPE_MISC,0,"vcfl","ctx"); 92 EVCNT_ATTACH_STATIC(vcache_flush_ctx); 93 struct evcnt vcache_flush_range = 94 EVCNT_INITIALIZER(EVCNT_TYPE_MISC,0,"vcfl","rng"); 95 EVCNT_ATTACH_STATIC(vcache_flush_range); 96 97 int cache_alias_dist; /* Cache anti-aliasing constants */ 98 int cache_alias_bits; 99 u_long dvma_cachealign; 100 101 /* 102 * Enable the cache. 103 * We need to clear out the valid bits first. 104 */ 105 void 106 sun4_cache_enable(void) 107 { 108 u_int i, lim, ls, ts; 109 110 cache_alias_bits = CPU_ISSUN4 111 ? CACHE_ALIAS_BITS_SUN4 112 : CACHE_ALIAS_BITS_SUN4C; 113 cache_alias_dist = CPU_ISSUN4 114 ? CACHE_ALIAS_DIST_SUN4 115 : CACHE_ALIAS_DIST_SUN4C; 116 117 ls = CACHEINFO.c_linesize; 118 ts = CACHEINFO.c_totalsize; 119 120 for (i = AC_CACHETAGS, lim = i + ts; i < lim; i += ls) 121 sta(i, ASI_CONTROL, 0); 122 123 stba(AC_SYSENABLE, ASI_CONTROL, 124 lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_CACHE); 125 CACHEINFO.c_enabled = 1; 126 127 #ifdef notyet 128 if (cpuinfo.flags & SUN4_IOCACHE) { 129 stba(AC_SYSENABLE, ASI_CONTROL, 130 lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_IOCACHE); 131 printf("iocache enabled\n"); 132 } 133 #endif 134 } 135 136 /* 137 * XXX Hammer is a bit too big, here; SUN4D systems only have Viking. 138 */ 139 #if defined(SUN4M) || defined(SUN4D) 140 void 141 ms1_cache_enable(void) 142 { 143 u_int pcr; 144 145 cache_alias_dist = uimax( 146 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity, 147 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity); 148 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET; 149 150 pcr = lda(SRMMU_PCR, ASI_SRMMU); 151 152 /* We "flash-clear" the I/D caches. */ 153 if ((pcr & MS1_PCR_ICE) == 0) 154 sta(0, ASI_ICACHECLR, 0); 155 if ((pcr & MS1_PCR_DCE) == 0) 156 sta(0, ASI_DCACHECLR, 0); 157 158 /* Turn on caches */ 159 sta(SRMMU_PCR, ASI_SRMMU, pcr | MS1_PCR_DCE | MS1_PCR_ICE); 160 161 CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1; 162 163 /* 164 * When zeroing or copying pages, there might still be entries in 165 * the cache, since we don't flush pages from the cache when 166 * unmapping them (`vactype' is VAC_NONE). Fortunately, the 167 * MS1 cache is write-through and not write-allocate, so we can 168 * use cacheable access while not displacing cache lines. 169 */ 170 CACHEINFO.c_flags |= CACHE_MANDATORY; 171 } 172 173 void 174 viking_cache_enable(void) 175 { 176 u_int pcr; 177 178 pcr = lda(SRMMU_PCR, ASI_SRMMU); 179 180 if ((pcr & VIKING_PCR_ICE) == 0) { 181 /* I-cache not on; "flash-clear" it now. */ 182 sta(0x80000000, ASI_ICACHECLR, 0); /* Unlock */ 183 sta(0, ASI_ICACHECLR, 0); /* clear */ 184 } 185 if ((pcr & VIKING_PCR_DCE) == 0) { 186 /* D-cache not on: "flash-clear" it. */ 187 sta(0x80000000, ASI_DCACHECLR, 0); 188 sta(0, ASI_DCACHECLR, 0); 189 } 190 191 /* Turn on caches via MMU */ 192 sta(SRMMU_PCR, ASI_SRMMU, pcr | VIKING_PCR_DCE | VIKING_PCR_ICE); 193 194 CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1; 195 196 /* Now turn on MultiCache if it exists */ 197 if (cpuinfo.mxcc && CACHEINFO.ec_totalsize > 0) { 198 /* Set external cache enable bit in MXCC control register */ 199 stda(MXCC_CTRLREG, ASI_CONTROL, 200 ldda(MXCC_CTRLREG, ASI_CONTROL) | MXCC_CTRLREG_CE); 201 CACHEINFO.c_flags |= CACHE_PAGETABLES; /* Ok to cache PTEs */ 202 CACHEINFO.ec_enabled = 1; 203 } 204 } 205 206 void 207 hypersparc_cache_enable(void) 208 { 209 int i, ls, ts; 210 u_int pcr, v; 211 int alias_dist; 212 213 /* 214 * Setup the anti-aliasing constants and DVMA alignment constraint. 215 */ 216 alias_dist = CACHEINFO.c_totalsize; 217 if (alias_dist > cache_alias_dist) { 218 cache_alias_dist = alias_dist; 219 cache_alias_bits = (alias_dist - 1) & ~PGOFSET; 220 dvma_cachealign = cache_alias_dist; 221 } 222 223 ls = CACHEINFO.c_linesize; 224 ts = CACHEINFO.c_totalsize; 225 pcr = lda(SRMMU_PCR, ASI_SRMMU); 226 227 /* Now reset cache tag memory if cache not yet enabled */ 228 if ((pcr & HYPERSPARC_PCR_CE) == 0) 229 for (i = 0; i < ts; i += ls) 230 sta(i, ASI_DCACHETAG, 0); 231 232 pcr &= ~(HYPERSPARC_PCR_CE | HYPERSPARC_PCR_CM); 233 hypersparc_cache_flush_all(); 234 235 /* Enable write-back cache */ 236 pcr |= HYPERSPARC_PCR_CE; 237 if (CACHEINFO.c_vactype == VAC_WRITEBACK) 238 pcr |= HYPERSPARC_PCR_CM; 239 240 sta(SRMMU_PCR, ASI_SRMMU, pcr); 241 CACHEINFO.c_enabled = 1; 242 243 /* XXX: should add support */ 244 if (CACHEINFO.c_hwflush) 245 panic("cache_enable: can't handle 4M with hw-flush cache"); 246 247 /* 248 * Enable instruction cache and, on single-processor machines, 249 * disable `Unimplemented Flush Traps'. 250 */ 251 v = HYPERSPARC_ICCR_ICE | (sparc_ncpus <= 1 ? HYPERSPARC_ICCR_FTD : 0); 252 wrasr(v, HYPERSPARC_ASRNUM_ICCR); 253 } 254 255 256 void 257 swift_cache_enable(void) 258 { 259 int i, ls, ts; 260 u_int pcr; 261 262 cache_alias_dist = uimax( 263 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity, 264 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity); 265 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET; 266 267 pcr = lda(SRMMU_PCR, ASI_SRMMU); 268 269 /* Now reset cache tag memory if cache not yet enabled */ 270 ls = CACHEINFO.ic_linesize; 271 ts = CACHEINFO.ic_totalsize; 272 if ((pcr & SWIFT_PCR_ICE) == 0) 273 for (i = 0; i < ts; i += ls) 274 sta(i, ASI_ICACHETAG, 0); 275 276 ls = CACHEINFO.dc_linesize; 277 ts = CACHEINFO.dc_totalsize; 278 if ((pcr & SWIFT_PCR_DCE) == 0) 279 for (i = 0; i < ts; i += ls) 280 sta(i, ASI_DCACHETAG, 0); 281 282 pcr |= (SWIFT_PCR_ICE | SWIFT_PCR_DCE); 283 sta(SRMMU_PCR, ASI_SRMMU, pcr); 284 CACHEINFO.c_enabled = 1; 285 } 286 287 void 288 cypress_cache_enable(void) 289 { 290 int i, ls, ts; 291 u_int pcr; 292 int alias_dist; 293 294 alias_dist = CACHEINFO.c_totalsize; 295 if (alias_dist > cache_alias_dist) { 296 cache_alias_dist = alias_dist; 297 cache_alias_bits = (alias_dist - 1) & ~PGOFSET; 298 dvma_cachealign = alias_dist; 299 } 300 301 pcr = lda(SRMMU_PCR, ASI_SRMMU); 302 pcr &= ~CYPRESS_PCR_CM; 303 304 /* Now reset cache tag memory if cache not yet enabled */ 305 ls = CACHEINFO.c_linesize; 306 ts = CACHEINFO.c_totalsize; 307 if ((pcr & CYPRESS_PCR_CE) == 0) 308 for (i = 0; i < ts; i += ls) 309 sta(i, ASI_DCACHETAG, 0); 310 311 pcr |= CYPRESS_PCR_CE; 312 /* If put in write-back mode, turn it on */ 313 if (CACHEINFO.c_vactype == VAC_WRITEBACK) 314 pcr |= CYPRESS_PCR_CM; 315 sta(SRMMU_PCR, ASI_SRMMU, pcr); 316 CACHEINFO.c_enabled = 1; 317 } 318 319 void 320 turbosparc_cache_enable(void) 321 { 322 int i, ls, ts; 323 u_int pcr, pcf; 324 /* External cache sizes in KB; see Turbo sparc manual */ 325 static const int ts_ecache_table[8] = {0,256,512,1024,512,1024,1024,0}; 326 327 cache_alias_dist = uimax( 328 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity, 329 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity); 330 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET; 331 332 pcr = lda(SRMMU_PCR, ASI_SRMMU); 333 334 /* Now reset cache tag memory if cache not yet enabled */ 335 ls = CACHEINFO.ic_linesize; 336 ts = CACHEINFO.ic_totalsize; 337 if ((pcr & TURBOSPARC_PCR_ICE) == 0) 338 for (i = 0; i < ts; i += ls) 339 sta(i, ASI_ICACHETAG, 0); 340 341 ls = CACHEINFO.dc_linesize; 342 ts = CACHEINFO.dc_totalsize; 343 if ((pcr & TURBOSPARC_PCR_DCE) == 0) 344 for (i = 0; i < ts; i += ls) 345 sta(i, ASI_DCACHETAG, 0); 346 347 pcr |= (TURBOSPARC_PCR_ICE | TURBOSPARC_PCR_DCE); 348 sta(SRMMU_PCR, ASI_SRMMU, pcr); 349 350 pcf = lda(SRMMU_PCFG, ASI_SRMMU); 351 if (pcf & TURBOSPARC_PCFG_SE) { 352 /* 353 * Record external cache info. The Turbosparc's second- 354 * level cache is physically addressed/tagged and is 355 * not exposed by the PROM. 356 */ 357 CACHEINFO.ec_totalsize = 1024 * 358 ts_ecache_table[(pcf & TURBOSPARC_PCFG_SCC)]; 359 CACHEINFO.ec_linesize = 32; 360 } 361 if (pcf & TURBOSPARC_PCFG_SNP) 362 printf(": DVMA coherent "); 363 364 CACHEINFO.c_enabled = 1; 365 } 366 #endif /* SUN4M || SUN4D */ 367 368 369 /* 370 * Note: the sun4 & sun4c the cache flush functions ignore the `ctx' 371 * parameter. This can be done since the pmap operations that need 372 * to flush cache lines will already have switched to the proper 373 * context to manipulate the MMU. Hence we can avoid the overhead 374 * if saving and restoring the context here. 375 */ 376 377 /* 378 * Flush the current context from the cache. 379 * 380 * This is done by writing to each cache line in the `flush context' 381 * address space (or, for hardware flush, once to each page in the 382 * hardware flush space, for all cache pages). 383 */ 384 void 385 sun4_vcache_flush_context(int ctx) 386 { 387 char *p; 388 int i, ls; 389 390 vcache_flush_ctx.ev_count++; 391 p = (char *)0; /* addresses 0..cacheinfo.c_totalsize will do fine */ 392 if (CACHEINFO.c_hwflush) { 393 ls = PAGE_SIZE; 394 i = CACHEINFO.c_totalsize >> PGSHIFT; 395 for (; --i >= 0; p += ls) 396 sta(p, ASI_HWFLUSHCTX, 0); 397 } else { 398 ls = CACHEINFO.c_linesize; 399 i = CACHEINFO.c_nlines; 400 for (; --i >= 0; p += ls) 401 sta(p, ASI_FLUSHCTX, 0); 402 } 403 } 404 405 /* 406 * Flush the given virtual region from the cache. 407 * 408 * This is also done by writing to each cache line, except that 409 * now the addresses must include the virtual region number, and 410 * we use the `flush region' space. 411 * 412 * This function is only called on sun4's with 3-level MMUs; there's 413 * no hw-flush space. 414 */ 415 void 416 sun4_vcache_flush_region(int vreg, int ctx) 417 { 418 int i, ls; 419 char *p; 420 421 vcache_flush_reg.ev_count++; 422 p = (char *)VRTOVA(vreg); /* reg..reg+sz rather than 0..sz */ 423 ls = CACHEINFO.c_linesize; 424 i = CACHEINFO.c_nlines; 425 for (; --i >= 0; p += ls) 426 sta(p, ASI_FLUSHREG, 0); 427 } 428 429 /* 430 * Flush the given virtual segment from the cache. 431 * 432 * This is also done by writing to each cache line, except that 433 * now the addresses must include the virtual segment number, and 434 * we use the `flush segment' space. 435 * 436 * Again, for hardware, we just write each page (in hw-flush space). 437 */ 438 void 439 sun4_vcache_flush_segment(int vreg, int vseg, int ctx) 440 { 441 int i, ls; 442 char *p; 443 444 vcache_flush_seg.ev_count++; 445 p = (char *)VSTOVA(vreg, vseg); /* seg..seg+sz rather than 0..sz */ 446 if (CACHEINFO.c_hwflush) { 447 ls = PAGE_SIZE; 448 i = CACHEINFO.c_totalsize >> PGSHIFT; 449 for (; --i >= 0; p += ls) 450 sta(p, ASI_HWFLUSHSEG, 0); 451 } else { 452 ls = CACHEINFO.c_linesize; 453 i = CACHEINFO.c_nlines; 454 for (; --i >= 0; p += ls) 455 sta(p, ASI_FLUSHSEG, 0); 456 } 457 } 458 459 /* 460 * Flush the given virtual page from the cache. 461 * (va is the actual address, and must be aligned on a page boundary.) 462 * Again we write to each cache line. 463 */ 464 void 465 sun4_vcache_flush_page(int va, int ctx) 466 { 467 int i, ls; 468 char *p; 469 470 #ifdef DEBUG 471 if (va & PGOFSET) 472 panic("cache_flush_page: asked to flush misaligned va 0x%x",va); 473 #endif 474 475 vcache_flush_pg.ev_count++; 476 p = (char *)va; 477 ls = CACHEINFO.c_linesize; 478 i = PAGE_SIZE >> CACHEINFO.c_l2linesize; 479 for (; --i >= 0; p += ls) 480 sta(p, ASI_FLUSHPG, 0); 481 } 482 483 /* 484 * Flush the given virtual page from the cache. 485 * (va is the actual address, and must be aligned on a page boundary.) 486 * This version uses hardware-assisted flush operation and just needs 487 * one write into ASI_HWFLUSHPG space to flush all cache lines. 488 */ 489 void 490 sun4_vcache_flush_page_hw(int va, int ctx) 491 { 492 char *p; 493 494 #ifdef DEBUG 495 if (va & PGOFSET) 496 panic("cache_flush_page: asked to flush misaligned va 0x%x",va); 497 #endif 498 499 vcache_flush_pg.ev_count++; 500 p = (char *)va; 501 sta(p, ASI_HWFLUSHPG, 0); 502 } 503 504 /* 505 * Flush a range of virtual addresses (in the current context). 506 * The first byte is at (base&~PGOFSET) and the last one is just 507 * before byte (base+len). 508 * 509 * We choose the best of (context,segment,page) here. 510 */ 511 512 #define CACHE_FLUSH_MAGIC (CACHEINFO.c_totalsize / PAGE_SIZE) 513 514 void 515 sun4_cache_flush(void *base, u_int len) 516 { 517 int i, ls, baseoff; 518 char *p; 519 520 if (CACHEINFO.c_vactype == VAC_NONE) 521 return; 522 523 /* 524 * Figure out how much must be flushed. 525 * 526 * If we need to do CACHE_FLUSH_MAGIC pages, we can do a segment 527 * in the same number of loop iterations. We can also do the whole 528 * region. If we need to do between 2 and NSEGRG, do the region. 529 * If we need to do two or more regions, just go ahead and do the 530 * whole context. This might not be ideal (e.g., fsck likes to do 531 * 65536-byte reads, which might not necessarily be aligned). 532 * 533 * We could try to be sneaky here and use the direct mapping 534 * to avoid flushing things `below' the start and `above' the 535 * ending address (rather than rounding to whole pages and 536 * segments), but I did not want to debug that now and it is 537 * not clear it would help much. 538 * 539 * (XXX the magic number 16 is now wrong, must review policy) 540 */ 541 baseoff = (int)base & PGOFSET; 542 i = (baseoff + len + PGOFSET) >> PGSHIFT; 543 544 vcache_flush_range.ev_count++; 545 546 if (__predict_true(i < CACHE_FLUSH_MAGIC)) { 547 /* cache_flush_page, for i pages */ 548 p = (char *)((int)base & ~baseoff); 549 if (CACHEINFO.c_hwflush) { 550 for (; --i >= 0; p += PAGE_SIZE) 551 sta(p, ASI_HWFLUSHPG, 0); 552 } else { 553 ls = CACHEINFO.c_linesize; 554 i <<= PGSHIFT - CACHEINFO.c_l2linesize; 555 for (; --i >= 0; p += ls) 556 sta(p, ASI_FLUSHPG, 0); 557 } 558 return; 559 } 560 561 baseoff = (u_int)base & SGOFSET; 562 i = (baseoff + len + SGOFSET) >> SGSHIFT; 563 if (__predict_true(i == 1)) { 564 sun4_vcache_flush_segment(VA_VREG(base), VA_VSEG(base), 0); 565 return; 566 } 567 568 if (HASSUN4_MMU3L) { 569 baseoff = (u_int)base & RGOFSET; 570 i = (baseoff + len + RGOFSET) >> RGSHIFT; 571 if (i == 1) 572 sun4_vcache_flush_region(VA_VREG(base), 0); 573 else 574 sun4_vcache_flush_context(0); 575 } else 576 sun4_vcache_flush_context(0); 577 } 578 579 580 #if defined(SUN4M) || defined(SUN4D) 581 #define trapoff() do { setpsr(getpsr() & ~PSR_ET); } while(0) 582 #define trapon() do { setpsr(getpsr() | PSR_ET); } while(0) 583 /* 584 * Flush the current context from the cache. 585 * 586 * This is done by writing to each cache line in the `flush context' 587 * address space. 588 */ 589 void 590 srmmu_vcache_flush_context(int ctx) 591 { 592 int i, ls, octx; 593 char *p; 594 595 vcache_flush_ctx.ev_count++; 596 p = (char *)0; /* addresses 0..cacheinfo.c_totalsize will do fine */ 597 ls = CACHEINFO.c_linesize; 598 i = CACHEINFO.c_nlines; 599 octx = getcontext4m(); 600 trapoff(); 601 setcontext4m(ctx); 602 for (; --i >= 0; p += ls) 603 sta(p, ASI_IDCACHELFC, 0); 604 setcontext4m(octx); 605 trapon(); 606 } 607 608 /* 609 * Flush the given virtual region from the cache. 610 * 611 * This is also done by writing to each cache line, except that 612 * now the addresses must include the virtual region number, and 613 * we use the `flush region' space. 614 */ 615 void 616 srmmu_vcache_flush_region(int vreg, int ctx) 617 { 618 int i, ls, octx; 619 char *p; 620 621 vcache_flush_reg.ev_count++; 622 p = (char *)VRTOVA(vreg); /* reg..reg+sz rather than 0..sz */ 623 ls = CACHEINFO.c_linesize; 624 i = CACHEINFO.c_nlines; 625 octx = getcontext4m(); 626 trapoff(); 627 setcontext4m(ctx); 628 for (; --i >= 0; p += ls) 629 sta(p, ASI_IDCACHELFR, 0); 630 setcontext4m(octx); 631 trapon(); 632 } 633 634 /* 635 * Flush the given virtual segment from the cache. 636 * 637 * This is also done by writing to each cache line, except that 638 * now the addresses must include the virtual segment number, and 639 * we use the `flush segment' space. 640 * 641 * Again, for hardware, we just write each page (in hw-flush space). 642 */ 643 void 644 srmmu_vcache_flush_segment(int vreg, int vseg, int ctx) 645 { 646 int i, ls, octx; 647 char *p; 648 649 vcache_flush_seg.ev_count++; 650 p = (char *)VSTOVA(vreg, vseg); /* seg..seg+sz rather than 0..sz */ 651 ls = CACHEINFO.c_linesize; 652 i = CACHEINFO.c_nlines; 653 octx = getcontext4m(); 654 trapoff(); 655 setcontext4m(ctx); 656 for (; --i >= 0; p += ls) 657 sta(p, ASI_IDCACHELFS, 0); 658 setcontext4m(octx); 659 trapon(); 660 } 661 662 /* 663 * Flush the given virtual page from the cache. 664 * (va is the actual address, and must be aligned on a page boundary.) 665 * Again we write to each cache line. 666 */ 667 void 668 srmmu_vcache_flush_page(int va, int ctx) 669 { 670 int i, ls, octx; 671 char *p; 672 673 #ifdef DEBUG 674 if (va & PGOFSET) 675 panic("cache_flush_page: asked to flush misaligned va 0x%x",va); 676 #endif 677 678 vcache_flush_pg.ev_count++; 679 p = (char *)va; 680 681 /* 682 * XXX - if called early during bootstrap, we don't have the cache 683 * info yet. Make up a cache line size (double-word aligned) 684 */ 685 if ((ls = CACHEINFO.c_linesize) == 0) 686 ls = 8; 687 i = PAGE_SIZE; 688 octx = getcontext4m(); 689 trapoff(); 690 setcontext4m(ctx); 691 for (; i > 0; p += ls, i -= ls) 692 sta(p, ASI_IDCACHELFP, 0); 693 #if defined(MULTIPROCESSOR) 694 /* 695 * The page flush operation will have caused a MMU table walk 696 * on Hypersparc because the is physically tagged. Since the pmap 697 * functions will not always cross flush it in the MP case (because 698 * may not be active on this CPU) we flush the TLB entry now. 699 */ 700 /*if (cpuinfo.cpu_type == CPUTYP_HS_MBUS) -- more work than it's worth */ 701 sta(va | ASI_SRMMUFP_L3, ASI_SRMMUFP, 0); 702 703 #endif 704 setcontext4m(octx); 705 trapon(); 706 } 707 708 /* 709 * Flush entire cache. 710 */ 711 void 712 srmmu_cache_flush_all(void) 713 { 714 715 srmmu_vcache_flush_context(0); 716 } 717 718 void 719 srmmu_vcache_flush_range(int va, int len, int ctx) 720 { 721 int i, ls, offset; 722 char *p; 723 int octx; 724 725 /* 726 * XXX - if called early during bootstrap, we don't have the cache 727 * info yet. Make up a cache line size (double-word aligned) 728 */ 729 if ((ls = CACHEINFO.c_linesize) == 0) 730 ls = 8; 731 732 vcache_flush_range.ev_count++; 733 734 /* Compute # of cache lines covered by this range */ 735 offset = va & (ls - 1); 736 i = len + offset; 737 p = (char *)(va & ~(ls - 1)); 738 739 octx = getcontext4m(); 740 trapoff(); 741 setcontext4m(ctx); 742 for (; i > 0; p += ls, i -= ls) 743 sta(p, ASI_IDCACHELFP, 0); 744 745 #if defined(MULTIPROCESSOR) 746 if (cpuinfo.cpu_type == CPUTYP_HS_MBUS) { 747 /* 748 * See hypersparc comment in srmmu_vcache_flush_page(). 749 */ 750 offset = va & PGOFSET; 751 i = (offset + len + PGOFSET) >> PGSHIFT; 752 753 va = va & ~PGOFSET; 754 for (; --i >= 0; va += PAGE_SIZE) 755 sta(va | ASI_SRMMUFP_L3, ASI_SRMMUFP, 0); 756 } 757 #endif 758 setcontext4m(octx); 759 trapon(); 760 return; 761 } 762 763 /* 764 * Flush a range of virtual addresses (in the current context). 765 * 766 * We choose the best of (context,segment,page) here. 767 */ 768 769 void 770 srmmu_cache_flush(void *base, u_int len) 771 { 772 int ctx = getcontext4m(); 773 int i, baseoff; 774 775 776 /* 777 * Figure out the most efficient way to flush. 778 * 779 * If we need to do CACHE_FLUSH_MAGIC pages, we can do a segment 780 * in the same number of loop iterations. We can also do the whole 781 * region. If we need to do between 2 and NSEGRG, do the region. 782 * If we need to do two or more regions, just go ahead and do the 783 * whole context. This might not be ideal (e.g., fsck likes to do 784 * 65536-byte reads, which might not necessarily be aligned). 785 * 786 * We could try to be sneaky here and use the direct mapping 787 * to avoid flushing things `below' the start and `above' the 788 * ending address (rather than rounding to whole pages and 789 * segments), but I did not want to debug that now and it is 790 * not clear it would help much. 791 * 792 */ 793 794 if (__predict_true(len < CACHEINFO.c_totalsize)) { 795 #if defined(MULTIPROCESSOR) 796 FXCALL3(cpuinfo.sp_vcache_flush_range, 797 cpuinfo.ft_vcache_flush_range, 798 (int)base, len, ctx, CPUSET_ALL); 799 #else 800 cpuinfo.sp_vcache_flush_range((int)base, len, ctx); 801 #endif 802 return; 803 } 804 805 baseoff = (u_int)base & SGOFSET; 806 i = (baseoff + len + SGOFSET) >> SGSHIFT; 807 if (__predict_true(i == 1)) { 808 #if defined(MULTIPROCESSOR) 809 FXCALL3(cpuinfo.sp_vcache_flush_segment, 810 cpuinfo.ft_vcache_flush_segment, 811 VA_VREG(base), VA_VSEG(base), ctx, CPUSET_ALL); 812 #else 813 srmmu_vcache_flush_segment(VA_VREG(base), VA_VSEG(base), ctx); 814 #endif 815 return; 816 } 817 818 baseoff = (u_int)base & RGOFSET; 819 i = (baseoff + len + RGOFSET) >> RGSHIFT; 820 while (i--) { 821 #if defined(MULTIPROCESSOR) 822 FXCALL2(cpuinfo.sp_vcache_flush_region, 823 cpuinfo.ft_vcache_flush_region, 824 VA_VREG(base), ctx, CPUSET_ALL); 825 #else 826 srmmu_vcache_flush_region(VA_VREG(base), ctx); 827 #endif 828 base = ((char *)base + NBPRG); 829 } 830 } 831 832 int ms1_cacheflush_magic = 0; 833 #define MS1_CACHEFLUSH_MAGIC ms1_cacheflush_magic 834 835 void 836 ms1_cache_flush(void *base, u_int len) 837 { 838 839 /* 840 * Although physically tagged, we still need to flush the 841 * data cache after (if we have a write-through cache) or before 842 * (in case of write-back caches) DMA operations. 843 */ 844 845 #if MS1_CACHEFLUSH_MAGIC 846 if (len <= MS1_CACHEFLUSH_MAGIC) { 847 /* 848 * If the range to be flushed is sufficiently small 849 * invalidate the covered cache lines by hand. 850 * 851 * The MicroSPARC I has a direct-mapped virtually addressed 852 * physically tagged data cache which is organised as 853 * 128 lines of 16 bytes. Virtual address bits [4-10] 854 * select the cache line. The cache tags are accessed 855 * through the standard DCACHE control space using the 856 * same address bits as those used to select the cache 857 * line in the virtual address. 858 * 859 * Note: we don't bother to compare the actual tags 860 * since that would require looking up physical addresses. 861 * 862 * The format of the tags we read from ASI_DCACHE control 863 * space is: 864 * 865 * 31 27 26 11 10 1 0 866 * +--------+----------------+------------+-+ 867 * | xxx | PA[26-11] | xxx |V| 868 * +--------+----------------+------------+-+ 869 * 870 * PA: bits 11-26 of the physical address 871 * V: line valid bit 872 */ 873 int tagaddr = ((u_int)base & 0x7f0); 874 875 len = roundup(len, 16); 876 while (len != 0) { 877 int tag = lda(tagaddr, ASI_DCACHETAG); 878 if ((tag & 1) == 1) { 879 /* Mark this cache line invalid */ 880 sta(tagaddr, ASI_DCACHETAG, 0); 881 } 882 len -= 16; 883 tagaddr = (tagaddr + 16) & 0x7f0; 884 } 885 } else 886 #endif 887 /* Flush entire data cache */ 888 sta(0, ASI_DCACHECLR, 0); 889 } 890 891 892 /* 893 * Flush entire cache. 894 */ 895 void 896 ms1_cache_flush_all(void) 897 { 898 899 /* Flash-clear both caches */ 900 sta(0, ASI_ICACHECLR, 0); 901 sta(0, ASI_DCACHECLR, 0); 902 } 903 904 void 905 hypersparc_cache_flush_all(void) 906 { 907 908 srmmu_vcache_flush_context(getcontext4m()); 909 /* Flush instruction cache */ 910 hypersparc_pure_vcache_flush(); 911 } 912 913 void 914 cypress_cache_flush_all(void) 915 { 916 char *p; 917 int i, ls; 918 919 /* Fill the cache with known read-only content */ 920 p = (char *)kernel_text; 921 ls = CACHEINFO.c_linesize; 922 i = CACHEINFO.c_nlines; 923 for (; --i >= 0; p += ls) 924 (*(volatile char *)p); 925 } 926 927 928 void 929 viking_cache_flush(void *base, u_int len) 930 { 931 } 932 933 void 934 viking_pcache_flush_page(paddr_t pa, int invalidate_only) 935 { 936 int set, i; 937 938 /* 939 * The viking's on-chip data cache is 4-way set associative, 940 * consisting of 128 sets, each holding 4 lines of 32 bytes. 941 * Note that one 4096 byte page exactly covers all 128 sets 942 * in the cache. 943 */ 944 if (invalidate_only) { 945 u_int pa_tag = (pa >> 12); 946 u_int tagaddr; 947 uint64_t tag; 948 949 /* 950 * Loop over all sets and invalidate all entries tagged 951 * with the given physical address by resetting the cache 952 * tag in ASI_DCACHETAG control space. 953 * 954 * The address format for accessing a tag is: 955 * 956 * 31 30 27 26 11 5 4 3 2 0 957 * +------+-----+------+-------//--------+--------+----+-----+ 958 * | type | xxx | line | xxx | set | xx | 0 | 959 * +------+-----+------+-------//--------+--------+----+-----+ 960 * 961 * set: the cache set tag to be read (0-127) 962 * line: the line within the set (0-3) 963 * type: 1: read set tag; 2: read physical tag 964 * 965 * The (type 2) tag read from this address is a 64-bit word 966 * formatted as follows: 967 * 968 * 5 4 4 969 * 63 6 8 0 23 0 970 * +-------+-+-------+-+-------+-+-----------+----------------+ 971 * | xxx |V| xxx |D| xxx |S| xxx | PA[35-12] | 972 * +-------+-+-------+-+-------+-+-----------+----------------+ 973 * 974 * PA: bits 12-35 of the physical address 975 * S: line shared bit (not present on SuperSPARC-II) 976 * D: line dirty bit (not present on SuperSPARC-II) 977 * V: line valid bit 978 */ 979 980 #define VIKING_DCACHETAG_S 0x0000010000000000ULL /* line valid bit */ 981 #define VIKING_DCACHETAG_D 0x0001000000000000ULL /* line dirty bit */ 982 #define VIKING_DCACHETAG_V 0x0100000000000000ULL /* line shared bit */ 983 #define VIKING_DCACHETAG_PAMASK 0x0000000000ffffffULL /* PA tag field */ 984 985 for (set = 0; set < 128; set++) { 986 /* Set set number and access type */ 987 tagaddr = (set << 5) | (2 << 30); 988 989 /* Examine the tag for each line in the set */ 990 for (i = 0 ; i < 4; i++) { 991 tag = ldda(tagaddr | (i << 26), ASI_DCACHETAG); 992 /* 993 * If this is a valid tag and the PA field 994 * matches clear the tag. 995 */ 996 if ((tag & VIKING_DCACHETAG_PAMASK) == pa_tag && 997 (tag & VIKING_DCACHETAG_V) != 0) 998 stda(tagaddr | (i << 26), 999 ASI_DCACHETAG, 0); 1000 } 1001 } 1002 1003 } else { 1004 /* 1005 * Force the cache to validate its backing memory 1006 * by displacing all cache lines with known read-only 1007 * content from the start of kernel text. 1008 * 1009 * Note that this thrashes the entire cache. However, 1010 * we currently only need to call upon this code 1011 * once at boot time. 1012 */ 1013 for (set = 0; set < 128; set++) { 1014 int *v = (int *)(kernel_text + (set << 5)); 1015 1016 /* 1017 * We need to read (2*associativity-1) different 1018 * locations to be sure to displace the entire set. 1019 */ 1020 i = 2 * 4 - 1; 1021 while (i--) { 1022 (*(volatile int *)v); 1023 v += 4096; 1024 } 1025 } 1026 } 1027 } 1028 #endif /* SUN4M || SUN4D */ 1029 1030 1031 #if defined(MULTIPROCESSOR) 1032 /* 1033 * Cache flushing on multi-processor systems involves sending 1034 * inter-processor messages to flush the cache on each module. 1035 * 1036 * The current context of the originating processor is passed in the 1037 * message. This assumes the allocation of CPU contextses is a global 1038 * operation (remember that the actual context tables for the CPUs 1039 * are distinct). 1040 */ 1041 1042 void 1043 smp_vcache_flush_page(int va, int ctx) 1044 { 1045 1046 FXCALL2(cpuinfo.sp_vcache_flush_page, cpuinfo.ft_vcache_flush_page, 1047 va, ctx, CPUSET_ALL); 1048 } 1049 1050 void 1051 smp_vcache_flush_segment(int vr, int vs, int ctx) 1052 { 1053 1054 FXCALL3(cpuinfo.sp_vcache_flush_segment, cpuinfo.ft_vcache_flush_segment, 1055 vr, vs, ctx, CPUSET_ALL); 1056 } 1057 1058 void 1059 smp_vcache_flush_region(int vr, int ctx) 1060 { 1061 1062 FXCALL2(cpuinfo.sp_vcache_flush_region, cpuinfo.ft_vcache_flush_region, 1063 vr, ctx, CPUSET_ALL); 1064 } 1065 1066 void 1067 smp_vcache_flush_context(int ctx) 1068 { 1069 1070 FXCALL1(cpuinfo.sp_vcache_flush_context, cpuinfo.ft_vcache_flush_context, 1071 ctx, CPUSET_ALL); 1072 } 1073 #endif /* MULTIPROCESSOR */ 1074