Home | History | Annotate | Line # | Download | only in mips
      1 /*	$NetBSD: cache_r5k.c,v 1.21 2020/06/14 15:12:56 tsutsui Exp $	*/
      2 
      3 /*
      4  * Copyright 2001 Wasabi Systems, Inc.
      5  * All rights reserved.
      6  *
      7  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *	This product includes software developed for the NetBSD Project by
     20  *	Wasabi Systems, Inc.
     21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
     22  *    or promote products derived from this software without specific prior
     23  *    written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
     26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 #include <sys/cdefs.h>
     39 __KERNEL_RCSID(0, "$NetBSD: cache_r5k.c,v 1.21 2020/06/14 15:12:56 tsutsui Exp $");
     40 
     41 #include <sys/param.h>
     42 
     43 #include <mips/cache.h>
     44 #include <mips/cache_r4k.h>
     45 #include <mips/cache_r5k.h>
     46 #include <mips/locore.h>
     47 
     48 /*
     49  * Cache operations for R5000-style caches:
     50  *
     51  *	- 2-way set-associative
     52  *	- Write-back
     53  *	- Virtually indexed, physically tagged
     54  *
     55  * Since the R4600 is so similar (2-way set-associative, 32b/l),
     56  * we handle that here, too.  Note for R4600, we have to work
     57  * around some chip bugs.  From the v1.7 errata:
     58  *
     59  *  18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D,
     60  *      Hit_Invalidate_D and Create_Dirty_Excl_D should only be
     61  *      executed if there is no other dcache activity. If the dcache is
     62  *      accessed for another instruction immeidately preceding when these
     63  *      cache instructions are executing, it is possible that the dcache
     64  *      tag match outputs used by these cache instructions will be
     65  *      incorrect. These cache instructions should be preceded by at least
     66  *      four instructions that are not any kind of load or store
     67  *      instruction.
     68  *
     69  * ...and from the v2.0 errata:
     70  *
     71  *   The CACHE instructions Hit_Writeback_Inv_D, Hit_Writeback_D,
     72  *   Hit_Invalidate_D and Create_Dirty_Exclusive_D will only operate
     73  *   correctly if the internal data cache refill buffer is empty.  These
     74  *   CACHE instructions should be separated from any potential data cache
     75  *   miss by a load instruction to an uncached address to empty the response
     76  *   buffer.
     77  *
     78  * XXX Does not handle split secondary caches.
     79  */
     80 
     81 #define	round_line16(x)		round_line(x, 16)
     82 #define	trunc_line16(x)		trunc_line(x, 16)
     83 #define	round_line32(x)		round_line(x, 32)
     84 #define	trunc_line32(x)		trunc_line(x, 32)
     85 #define	round_line(x,n)		(((x) + (register_t)(n) - 1) & -(register_t)(n))
     86 #define	trunc_line(x,n)		((x) & -(register_t)(n))
     87 
     88 __asm(".set mips3");
     89 
     90 void
     91 r5k_picache_sync_all(void)
     92 {
     93         struct mips_cache_info * const mci = &mips_cache_info;
     94 
     95         /*
     96          * Since we're hitting the whole thing, we don't have to
     97          * worry about the N different "ways".
     98          */
     99         mips_intern_dcache_sync_all();
    100 	__asm volatile("sync");
    101         mips_intern_icache_sync_range_index(MIPS_KSEG0_START,
    102             mci->mci_picache_size);
    103 }
    104 
    105 void
    106 r5k_picache_sync_range(register_t va, vsize_t size)
    107 {
    108 
    109 	mips_intern_dcache_sync_range(va, size);
    110 	mips_intern_icache_sync_range(va, size);
    111 }
    112 
    113 void
    114 r5k_picache_sync_range_index(vaddr_t va, vsize_t size)
    115 {
    116 	struct mips_cache_info * const mci = &mips_cache_info;
    117 	const size_t ways = mci->mci_picache_ways;
    118 	const size_t line_size = mci->mci_picache_line_size;
    119 	const size_t way_size = mci->mci_picache_way_size;
    120 	const size_t way_mask = way_size - 1;
    121 	vaddr_t eva;
    122 
    123 	/*
    124 	 * Since we're doing Index ops, we expect to not be able
    125 	 * to access the address we've been given.  So, get the
    126 	 * bits that determine the cache index, and make a KSEG0
    127 	 * address out of them.
    128 	 */
    129 	va = MIPS_PHYS_TO_KSEG0(va & way_mask);
    130 
    131 	eva = round_line(va + size, line_size);
    132 	va = trunc_line(va, line_size);
    133 	size = eva - va;
    134 
    135 	/*
    136 	 * If we are going to flush more than is in a way (or the stride
    137 	 * needed for that way), we are flushing everything.
    138 	 */
    139 	if (size >= way_size) {
    140 		r5k_picache_sync_all();
    141 		return;
    142 	}
    143 
    144 	for (size_t way = 0; way < ways; way++) {
    145 		mips_intern_dcache_sync_range_index(va, size);
    146 		mips_intern_icache_sync_range_index(va, size);
    147 		va += way_size;
    148 		eva += way_size;
    149 	}
    150 }
    151 
    152 void
    153 r5k_pdcache_wbinv_all(void)
    154 {
    155 	struct mips_cache_info * const mci = &mips_cache_info;
    156 
    157 	/*
    158 	 * Since we're hitting the whole thing, we don't have to
    159 	 * worry about the N different "ways".
    160 	 */
    161 	mips_intern_pdcache_wbinv_range_index(MIPS_KSEG0_START,
    162 	    mci->mci_pdcache_size);
    163 }
    164 
    165 void
    166 r5k_pdcache_wbinv_range_index(vaddr_t va, vsize_t size)
    167 {
    168 	struct mips_cache_info * const mci = &mips_cache_info;
    169 	const size_t ways = mci->mci_pdcache_ways;
    170 	const size_t line_size = mci->mci_pdcache_line_size;
    171 	const vaddr_t way_size = mci->mci_pdcache_way_size;
    172 	const vaddr_t way_mask = way_size - 1;
    173 	vaddr_t eva;
    174 
    175 	/*
    176 	 * Since we're doing Index ops, we expect to not be able
    177 	 * to access the address we've been given.  So, get the
    178 	 * bits that determine the cache index, and make a KSEG0
    179 	 * address out of them.
    180 	 */
    181 	va = MIPS_PHYS_TO_KSEG0(va & way_mask);
    182 	eva = round_line(va + size, line_size);
    183 	va = trunc_line(va, line_size);
    184 	size = eva - va;
    185 
    186 	/*
    187 	 * If we are going to flush more than is in a way, we are flushing
    188 	 * everything.
    189 	 */
    190 	if (size >= way_size) {
    191 		mips_intern_pdcache_wbinv_range_index(MIPS_KSEG0_START,
    192 		    mci->mci_pdcache_size);
    193 		return;
    194 	}
    195 
    196 	/*
    197 	 * Invalidate each way.  If the address range wraps past the end of
    198 	 * the way, we will be invalidating in two ways but eventually things
    199 	 * work out since the last way will wrap into the first way.
    200 	 */
    201 	for (size_t way = 0; way < ways; way++) {
    202 		mips_intern_pdcache_wbinv_range_index(va, size);
    203 		va += way_size;
    204 		eva += way_size;
    205 	}
    206 }
    207 
    208 void
    209 r4600v1_pdcache_wbinv_range_32(register_t va, vsize_t size)
    210 {
    211 	const register_t eva = round_line32(va + size);
    212 
    213 	/*
    214 	 * This is pathetically slow, but the chip bug is pretty
    215 	 * nasty, and we hope that not too many v1.x R4600s are
    216 	 * around.
    217 	 */
    218 
    219 	va = trunc_line32(va);
    220 
    221 	/*
    222 	 * To make this a little less painful, just hit the entire
    223 	 * cache if we have a range >= the cache size.
    224 	 */
    225 	if (eva - va >= mips_cache_info.mci_pdcache_size) {
    226 		r5k_pdcache_wbinv_all();
    227 		return;
    228 	}
    229 
    230 	const uint32_t ostatus = mips_cp0_status_read();
    231 
    232 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
    233 
    234 	while (va < eva) {
    235 		__asm volatile("nop; nop; nop; nop");
    236 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
    237 		va += 32;
    238 	}
    239 
    240 	mips_cp0_status_write(ostatus);
    241 }
    242 
    243 void
    244 r4600v2_pdcache_wbinv_range_32(register_t va, vsize_t size)
    245 {
    246 	const register_t eva = round_line32(va + size);
    247 
    248 	va = trunc_line32(va);
    249 
    250 	const uint32_t ostatus = mips_cp0_status_read();
    251 
    252 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
    253 
    254 	for (; (eva - va) >= (32 * 32); va += (32 * 32)) {
    255 		(void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0);
    256 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
    257 	}
    258 
    259 	(void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0);
    260 	for (; va < eva; va += 32) {
    261 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
    262 
    263 	}
    264 
    265 	mips_cp0_status_write(ostatus);
    266 }
    267 
    268 void
    269 vr4131v1_pdcache_wbinv_range_16(register_t va, vsize_t size)
    270 {
    271 	register_t eva = round_line16(va + size);
    272 
    273 	va = trunc_line16(va);
    274 
    275 	for (; (eva - va) >= (32 * 16); va += (32 * 16)) {
    276 		cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
    277 		cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
    278 
    279 	}
    280 
    281 	for (; va < eva; va += 16) {
    282 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
    283 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
    284 	}
    285 }
    286 
    287 void
    288 r4600v1_pdcache_inv_range_32(register_t va, vsize_t size)
    289 {
    290 	const register_t eva = round_line32(va + size);
    291 
    292 	/*
    293 	 * This is pathetically slow, but the chip bug is pretty
    294 	 * nasty, and we hope that not too many v1.x R4600s are
    295 	 * around.
    296 	 */
    297 
    298 	va = trunc_line32(va);
    299 
    300 	const uint32_t ostatus = mips_cp0_status_read();
    301 
    302 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
    303 
    304 	for (; va < eva; va += 32) {
    305 		__asm volatile("nop; nop; nop; nop;");
    306 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
    307 
    308 	}
    309 
    310 	mips_cp0_status_write(ostatus);
    311 }
    312 
    313 void
    314 r4600v2_pdcache_inv_range_32(register_t va, vsize_t size)
    315 {
    316 	const register_t eva = round_line32(va + size);
    317 
    318 	va = trunc_line32(va);
    319 
    320 	const uint32_t ostatus = mips_cp0_status_read();
    321 
    322 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
    323 
    324 	/*
    325 	 * Between blasts of big cache chunks, give interrupts
    326 	 * a chance to get though.
    327 	 */
    328 	for (; (eva - va) >= (32 * 32); va += (32 * 32)) {
    329 		(void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0);
    330 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
    331 
    332 	}
    333 
    334 	(void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0);
    335 	for (; va < eva; va += 32) {
    336 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
    337 
    338 	}
    339 
    340 	mips_cp0_status_write(ostatus);
    341 }
    342 
    343 void
    344 r4600v1_pdcache_wb_range_32(register_t va, vsize_t size)
    345 {
    346 	const register_t eva = round_line32(va + size);
    347 
    348 	/*
    349 	 * This is pathetically slow, but the chip bug is pretty
    350 	 * nasty, and we hope that not too many v1.x R4600s are
    351 	 * around.
    352 	 */
    353 
    354 	va = trunc_line32(va);
    355 
    356 	const uint32_t ostatus = mips_cp0_status_read();
    357 
    358 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
    359 
    360 	for (; va < eva; va += 32) {
    361 		__asm volatile("nop; nop; nop; nop;");
    362 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
    363 
    364 	}
    365 
    366 	mips_cp0_status_write(ostatus);
    367 }
    368 
    369 void
    370 r4600v2_pdcache_wb_range_32(register_t va, vsize_t size)
    371 {
    372 	const register_t eva = round_line32(va + size);
    373 
    374 	va = trunc_line32(va);
    375 
    376 	const uint32_t ostatus = mips_cp0_status_read();
    377 
    378 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
    379 
    380 	/*
    381 	 * Between blasts of big cache chunks, give interrupts
    382 	 * a chance to get though.
    383 	 */
    384 	for (; (eva - va) >= (32 * 32); va += (32 * 32)) {
    385 		(void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0);
    386 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
    387 
    388 	}
    389 
    390 	(void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0);
    391 	for (; va < eva; va += 32) {
    392 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
    393 	}
    394 
    395 	mips_cp0_status_write(ostatus);
    396 }
    397 
    398 /*
    399  * Cache operations for R5000-style secondary caches:
    400  *
    401  *	- Direct-mapped
    402  *	- Write-through
    403  *	- Physically indexed, physically tagged
    404  *
    405  */
    406 
    407 
    408 __asm(".set mips3");
    409 
    410 void
    411 r5k_sdcache_wbinv_all(void)
    412 {
    413 
    414 	r5k_sdcache_wbinv_range(MIPS_PHYS_TO_KSEG0(0), mips_cache_info.mci_sdcache_size);
    415 }
    416 
    417 void
    418 r5k_sdcache_wbinv_range_index(vaddr_t va, vsize_t size)
    419 {
    420 
    421 	/*
    422 	 * Since we're doing Index ops, we expect to not be able
    423 	 * to access the address we've been given.  So, get the
    424 	 * bits that determine the cache index, and make a KSEG0
    425 	 * address out of them.
    426 	 */
    427 	va = MIPS_PHYS_TO_KSEG0(va & (mips_cache_info.mci_sdcache_size - 1));
    428 	r5k_sdcache_wbinv_range((intptr_t)va, size);
    429 }
    430 
    431 void
    432 r5k_sdcache_wbinv_range(register_t va, vsize_t size)
    433 {
    434 	uint32_t ostatus, taglo;
    435 	register_t eva = mips_r5k_round_page(va + size);
    436 
    437 	va = mips_r5k_trunc_page(va);
    438 
    439 	ostatus = mips_cp0_status_read();
    440 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
    441 
    442 	__asm volatile("mfc0 %0, $28" : "=r"(taglo));
    443 	__asm volatile("mtc0 $0, $28");
    444 
    445 	for (; va < eva; va += R5K_SC_PAGESIZE) {
    446 		cache_op_r4k_line(va, CACHEOP_R5K_Page_Invalidate_S);
    447 	}
    448 
    449 	mips_cp0_status_write(ostatus);
    450 	__asm volatile("mtc0 %0, $28; nop" :: "r"(taglo));
    451 }
    452