Home | History | Annotate | Line # | Download | only in sh3
      1 /*	$NetBSD: cache_sh4.c,v 1.25 2020/07/25 23:38:48 uwe Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2002 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by UCHIYAMA Yasushi.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: cache_sh4.c,v 1.25 2020/07/25 23:38:48 uwe Exp $");
     34 
     35 #include "opt_cache.h"
     36 
     37 #include <sys/param.h>
     38 #include <sys/systm.h>
     39 
     40 #include <sh3/cache.h>
     41 #include <sh3/cache_sh4.h>
     42 #include <sh3/vmparam.h>
     43 
     44 #define	round_line(x)		(((x) + 31) & ~31)
     45 #define	trunc_line(x)		((x) & ~31)
     46 
     47 void sh4_icache_sync_all(void);
     48 void sh4_icache_sync_range(vaddr_t, vsize_t);
     49 void sh4_icache_sync_range_index(vaddr_t, vsize_t);
     50 void sh4_dcache_wbinv_all(void);
     51 void sh4_dcache_wbinv_range(vaddr_t, vsize_t);
     52 void sh4_dcache_wbinv_range_index(vaddr_t, vsize_t);
     53 void sh4_dcache_inv_range(vaddr_t, vsize_t);
     54 void sh4_dcache_wb_range(vaddr_t, vsize_t);
     55 
     56 /* EMODE */
     57 void sh4_emode_icache_sync_all(void);
     58 void sh4_emode_icache_sync_range_index(vaddr_t, vsize_t);
     59 void sh4_emode_dcache_wbinv_all(void);
     60 void sh4_emode_dcache_wbinv_range_index(vaddr_t, vsize_t);
     61 
     62 /* Must be inlined because we "call" them while running on P2 */
     63 static inline void cache_sh4_op_line_32(vaddr_t, vaddr_t, uint32_t,
     64     uint32_t) __attribute__((always_inline));
     65 static inline void cache_sh4_op_8lines_32(vaddr_t, vaddr_t, uint32_t,
     66     uint32_t) __attribute__((always_inline));
     67 static inline void cache_sh4_emode_op_line_32(vaddr_t, vaddr_t,
     68     uint32_t, uint32_t, uint32_t) __attribute__((always_inline));
     69 static inline void cache_sh4_emode_op_8lines_32(vaddr_t, vaddr_t,
     70     uint32_t, uint32_t, uint32_t) __attribute__((always_inline));
     71 
     72 
     73 void
     74 sh4_cache_config(void)
     75 {
     76 	int icache_size;
     77 	int dcache_size;
     78 	int ways;
     79 	uint32_t r;
     80 
     81         /* Determine cache size */
     82 	switch (cpu_product) {
     83 	default:
     84 		/* FALLTHROUGH */
     85 	case CPU_PRODUCT_7750:
     86 	case CPU_PRODUCT_7750S:
     87 	case CPU_PRODUCT_7751:
     88 #if defined(SH4_CACHE_DISABLE_EMODE)
     89 	case CPU_PRODUCT_7750R:
     90 	case CPU_PRODUCT_7751R:
     91 #endif
     92 		icache_size = SH4_ICACHE_SIZE;
     93 		dcache_size = SH4_DCACHE_SIZE;
     94 		ways = 1;
     95 		r = SH4_CCR_ICE|SH4_CCR_OCE|SH4_CCR_WT;
     96 		break;
     97 
     98 #if !defined(SH4_CACHE_DISABLE_EMODE)
     99 	case CPU_PRODUCT_7750R:
    100 	case CPU_PRODUCT_7751R:
    101 		icache_size = SH4_EMODE_ICACHE_SIZE;
    102 		dcache_size = SH4_EMODE_DCACHE_SIZE;
    103 		ways = 2;
    104 		r = SH4_CCR_EMODE|SH4_CCR_ICE|SH4_CCR_OCE|SH4_CCR_WT;
    105 		break;
    106 #endif
    107 
    108 	/*
    109 	 * The ST40 cache sizes can be customized for each product.
    110 	 * Refer to product-specific documentation for the cache sizes.
    111 	 */
    112 	case CPU_PRODUCT_STX7105:
    113 		icache_size = 32 * 1024;
    114 		dcache_size = 32 * 1024;
    115 		ways = 2;
    116 		r = SH4_CCR_EMODE|SH4_CCR_ICE|SH4_CCR_OCE|SH4_CCR_WT;
    117 		break;
    118 	}
    119 #if defined(SH4_CACHE_DISABLE_ICACHE)
    120 	r &= ~SH4_CCR_ICE;
    121 #endif
    122 #if defined(SH4_CACHE_DISABLE_DCACHE)
    123 	r &= ~SH4_CCR_OCE;
    124 #endif
    125 #if defined(SH4_CACHE_WB_U0_P0_P3)
    126 	r &= ~SH4_CCR_WT;
    127 #endif
    128 #if defined(SH4_CACHE_WB_P1)
    129 	r |= SH4_CCR_CB;
    130 #endif
    131 
    132 	RUN_P2;
    133 	if (r & SH4_CCR_EMODE)
    134 		SH4_EMODE_CACHE_FLUSH();
    135 	else
    136 		SH4_CACHE_FLUSH();
    137 	_reg_write_4(SH4_CCR, SH4_CCR_ICI|SH4_CCR_OCI);
    138 	_reg_write_4(SH4_CCR, r);
    139 	RUN_P1;
    140 
    141 	r = _reg_read_4(SH4_CCR);
    142 
    143 	sh_cache_unified = 0;
    144 	sh_cache_enable_icache = (r & SH4_CCR_ICE);
    145 	sh_cache_enable_dcache = (r & SH4_CCR_OCE);
    146 	sh_cache_ways = ways;
    147 	sh_cache_line_size = SH4_CACHE_LINESZ;
    148 	sh_cache_alias_mask = (dcache_size / ways - 1) & ~PAGE_MASK;
    149 	sh_cache_prefer_mask = (dcache_size / ways - 1);
    150 	sh_cache_write_through_p0_u0_p3 = (r & SH4_CCR_WT);
    151 	sh_cache_write_through_p1 = !(r & SH4_CCR_CB);
    152 	sh_cache_write_through = sh_cache_write_through_p0_u0_p3 &&
    153 	    sh_cache_write_through_p1;
    154 	sh_cache_ram_mode = (r & SH4_CCR_ORA);
    155 	sh_cache_index_mode_icache = (r & SH4_CCR_IIX);
    156 	sh_cache_index_mode_dcache = (r & SH4_CCR_OIX);
    157 
    158 	sh_cache_size_dcache = dcache_size;
    159 	if (sh_cache_ram_mode)
    160 		sh_cache_size_dcache /= 2;
    161 	sh_cache_size_icache = icache_size;
    162 
    163 	sh_cache_ops._icache_sync_all		= sh4_icache_sync_all;
    164 	sh_cache_ops._icache_sync_range		= sh4_icache_sync_range;
    165 	sh_cache_ops._icache_sync_range_index	= sh4_icache_sync_range_index;
    166 
    167 	sh_cache_ops._dcache_wbinv_all		= sh4_dcache_wbinv_all;
    168 	sh_cache_ops._dcache_wbinv_range	= sh4_dcache_wbinv_range;
    169 	sh_cache_ops._dcache_wbinv_range_index	= sh4_dcache_wbinv_range_index;
    170 	sh_cache_ops._dcache_inv_range		= sh4_dcache_inv_range;
    171 	sh_cache_ops._dcache_wb_range		= sh4_dcache_wb_range;
    172 
    173 	switch (cpu_product) {
    174 	case CPU_PRODUCT_7750:	/* FALLTHROUGH */
    175 	case CPU_PRODUCT_7750S:
    176 		/* memory mapped d$ can only be accessed from p2 */
    177 		sh_cache_ops._dcache_wbinv_all
    178 			= (void *)SH3_P1SEG_TO_P2SEG(sh4_dcache_wbinv_all);
    179 		sh_cache_ops._dcache_wbinv_range_index
    180 			= (void *)SH3_P1SEG_TO_P2SEG(sh4_dcache_wbinv_range_index);
    181 		break;
    182 
    183 #if !defined(SH4_CACHE_DISABLE_EMODE)
    184 	case CPU_PRODUCT_7750R:
    185 	case CPU_PRODUCT_7751R:
    186 		if (!(r & SH4_CCR_EMODE)) {
    187 			break;
    188 		}
    189 		sh_cache_ops._icache_sync_all = sh4_emode_icache_sync_all;
    190 		sh_cache_ops._icache_sync_range_index = sh4_emode_icache_sync_range_index;
    191 		sh_cache_ops._dcache_wbinv_all = sh4_emode_dcache_wbinv_all;
    192 		sh_cache_ops._dcache_wbinv_range_index = sh4_emode_dcache_wbinv_range_index;
    193 		break;
    194 #endif
    195 	}
    196 }
    197 
    198 /*
    199  * cache_sh4_op_line_32: (index-operation)
    200  *
    201  *	Clear the specified bits on single 32-byte cache line.
    202  */
    203 static inline void
    204 cache_sh4_op_line_32(vaddr_t va, vaddr_t base, uint32_t mask, uint32_t bits)
    205 {
    206 	vaddr_t cca;
    207 
    208 	cca = base | (va & mask);
    209 	_reg_bclr_4(cca, bits);
    210 }
    211 
    212 /*
    213  * cache_sh4_op_8lines_32: (index-operation)
    214  *
    215  *	Clear the specified bits on 8 32-byte cache lines.
    216  */
    217 static inline void
    218 cache_sh4_op_8lines_32(vaddr_t va, vaddr_t base, uint32_t mask, uint32_t bits)
    219 {
    220 	volatile uint32_t *cca = (volatile uint32_t *)
    221 	    (base | (va & mask));
    222 
    223 	cca[ 0] &= ~bits;
    224 	cca[ 8] &= ~bits;
    225 	cca[16] &= ~bits;
    226 	cca[24] &= ~bits;
    227 	cca[32] &= ~bits;
    228 	cca[40] &= ~bits;
    229 	cca[48] &= ~bits;
    230 	cca[56] &= ~bits;
    231 }
    232 
    233 void
    234 sh4_icache_sync_all(void)
    235 {
    236 	vaddr_t va = 0;
    237 	vaddr_t eva = SH4_ICACHE_SIZE;
    238 
    239 	/* d$ index ops must be called via P2 on 7750 and 7750S */
    240 	(*sh_cache_ops._dcache_wbinv_all)();
    241 
    242 	RUN_P2;
    243 	while (va < eva) {
    244 		cache_sh4_op_8lines_32(va, SH4_CCIA, CCIA_ENTRY_MASK, CCIA_V);
    245 		va += 32 * 8;
    246 	}
    247 	/* assume we are returning into a P1 caller */
    248 	PAD_P1_SWITCH;
    249 }
    250 
    251 void
    252 sh4_icache_sync_range(vaddr_t va, vsize_t sz)
    253 {
    254 	vaddr_t ccia;
    255 	vaddr_t eva = round_line(va + sz);
    256 	va = trunc_line(va);
    257 
    258 	sh4_dcache_wbinv_range(va, (eva - va));
    259 
    260 	RUN_P2;
    261 	while (va < eva) {
    262 		/* CCR.IIX has no effect on this entry specification */
    263 		ccia = SH4_CCIA | CCIA_A | (va & CCIA_ENTRY_MASK);
    264 		_reg_write_4(ccia, va & CCIA_TAGADDR_MASK); /* V = 0 */
    265 		va += 32;
    266 	}
    267 	/* assume we are returning into a P1 caller */
    268 	PAD_P1_SWITCH;
    269 }
    270 
    271 void
    272 sh4_icache_sync_range_index(vaddr_t va, vsize_t sz)
    273 {
    274 	vaddr_t eva = round_line(va + sz);
    275 	va = trunc_line(va);
    276 
    277 	/* d$ index ops must be called via P2 on 7750 and 7750S */
    278 	(*sh_cache_ops._dcache_wbinv_range_index)(va, eva - va);
    279 
    280 	RUN_P2;
    281 	while ((eva - va) >= (8 * 32)) {
    282 		cache_sh4_op_8lines_32(va, SH4_CCIA, CCIA_ENTRY_MASK, CCIA_V);
    283 		va += 32 * 8;
    284 	}
    285 
    286 	while (va < eva) {
    287 		cache_sh4_op_line_32(va, SH4_CCIA, CCIA_ENTRY_MASK, CCIA_V);
    288 		va += 32;
    289 	}
    290 	/* assume we are returning into a P1 caller */
    291 	PAD_P1_SWITCH;
    292 }
    293 
    294 void
    295 sh4_dcache_wbinv_all(void)
    296 {
    297 	vaddr_t va = 0;
    298 	vaddr_t eva = SH4_DCACHE_SIZE;
    299 
    300 	/* RUN_P2; */ /* called via P2 address if necessary */
    301 	while (va < eva) {
    302 		cache_sh4_op_8lines_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
    303 		    (CCDA_U | CCDA_V));
    304 		va += 32 * 8;
    305 	}
    306 	/* assume we are returning into a P1 caller */
    307 	PAD_P1_SWITCH;
    308 }
    309 
    310 void
    311 sh4_dcache_wbinv_range(vaddr_t va, vsize_t sz)
    312 {
    313 	vaddr_t eva = round_line(va + sz);
    314 	va = trunc_line(va);
    315 
    316 	while (va < eva) {
    317 		__asm volatile("ocbp @%0" : : "r"(va));
    318 		va += 32;
    319 	}
    320 }
    321 
    322 void
    323 sh4_dcache_wbinv_range_index(vaddr_t va, vsize_t sz)
    324 {
    325 	vaddr_t eva = round_line(va + sz);
    326 	va = trunc_line(va);
    327 
    328 	/* RUN_P2; */ /* called via P2 address if necessary */
    329 	while ((eva - va) >= (8 * 32)) {
    330 		cache_sh4_op_8lines_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
    331 		    (CCDA_U | CCDA_V));
    332 		va += 32 * 8;
    333 	}
    334 
    335 	while (va < eva) {
    336 		cache_sh4_op_line_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
    337 		    (CCDA_U | CCDA_V));
    338 		va += 32;
    339 	}
    340 	/* assume we are returning into a P1 caller */
    341 	PAD_P1_SWITCH;
    342 }
    343 
    344 void
    345 sh4_dcache_inv_range(vaddr_t va, vsize_t sz)
    346 {
    347 	vaddr_t eva = round_line(va + sz);
    348 	va = trunc_line(va);
    349 
    350 	while (va < eva) {
    351 		__asm volatile("ocbi @%0" : : "r"(va));
    352 		va += 32;
    353 	}
    354 }
    355 
    356 void
    357 sh4_dcache_wb_range(vaddr_t va, vsize_t sz)
    358 {
    359 	vaddr_t eva = round_line(va + sz);
    360 	va = trunc_line(va);
    361 
    362 	while (va < eva) {
    363 		__asm volatile("ocbwb @%0" : : "r"(va));
    364 		va += 32;
    365 	}
    366 }
    367 
    368 /*
    369  * EMODE operation
    370  */
    371 /*
    372  * cache_sh4_emode_op_line_32: (index-operation)
    373  *
    374  *	Clear the specified bits on single 32-byte cache line. 2-ways.
    375  */
    376 static inline void
    377 cache_sh4_emode_op_line_32(vaddr_t va, vaddr_t base, uint32_t mask,
    378     uint32_t bits, uint32_t way_shift)
    379 {
    380 	vaddr_t cca;
    381 
    382 	/* extract entry # */
    383 	va &= mask;
    384 
    385 	/* operate for each way */
    386 	cca = base | (0 << way_shift) | va;
    387 	_reg_bclr_4(cca, bits);
    388 
    389 	cca = base | (1 << way_shift) | va;
    390 	_reg_bclr_4(cca, bits);
    391 }
    392 
    393 /*
    394  * cache_sh4_emode_op_8lines_32: (index-operation)
    395  *
    396  *	Clear the specified bits on 8 32-byte cache lines. 2-ways.
    397  */
    398 static inline void
    399 cache_sh4_emode_op_8lines_32(vaddr_t va, vaddr_t base, uint32_t mask,
    400     uint32_t bits, uint32_t way_shift)
    401 {
    402 	volatile uint32_t *cca;
    403 
    404 	/* extract entry # */
    405 	va &= mask;
    406 
    407 	/* operate for each way */
    408 	cca = (volatile uint32_t *)(base | (0 << way_shift) | va);
    409 	cca[ 0] &= ~bits;
    410 	cca[ 8] &= ~bits;
    411 	cca[16] &= ~bits;
    412 	cca[24] &= ~bits;
    413 	cca[32] &= ~bits;
    414 	cca[40] &= ~bits;
    415 	cca[48] &= ~bits;
    416 	cca[56] &= ~bits;
    417 
    418 	cca = (volatile uint32_t *)(base | (1 << way_shift) | va);
    419 	cca[ 0] &= ~bits;
    420 	cca[ 8] &= ~bits;
    421 	cca[16] &= ~bits;
    422 	cca[24] &= ~bits;
    423 	cca[32] &= ~bits;
    424 	cca[40] &= ~bits;
    425 	cca[48] &= ~bits;
    426 	cca[56] &= ~bits;
    427 }
    428 
    429 void
    430 sh4_emode_icache_sync_all(void)
    431 {
    432 	vaddr_t va = 0;
    433 	vaddr_t eva = SH4_EMODE_ICACHE_SIZE;
    434 
    435 	sh4_emode_dcache_wbinv_all();
    436 
    437 	RUN_P2;
    438 	while (va < eva) {
    439 		cache_sh4_emode_op_8lines_32(va, SH4_CCIA, CCIA_ENTRY_MASK,
    440 		    CCIA_V, 13);
    441 		va += 32 * 8;
    442 	}
    443 	/* assume we are returning into a P1 caller */
    444 	PAD_P1_SWITCH;
    445 }
    446 
    447 void
    448 sh4_emode_icache_sync_range_index(vaddr_t va, vsize_t sz)
    449 {
    450 	vaddr_t eva = round_line(va + sz);
    451 	va = trunc_line(va);
    452 
    453 	sh4_emode_dcache_wbinv_range_index(va, eva - va);
    454 
    455 	RUN_P2;
    456 	while ((eva - va) >= (8 * 32)) {
    457 		cache_sh4_emode_op_8lines_32(va, SH4_CCIA, CCIA_ENTRY_MASK,
    458 		    CCIA_V, 13);
    459 		va += 32 * 8;
    460 	}
    461 
    462 	while (va < eva) {
    463 		cache_sh4_emode_op_line_32(va, SH4_CCIA, CCIA_ENTRY_MASK,
    464 		    CCIA_V, 13);
    465 		va += 32;
    466 	}
    467 	/* assume we are returning into a P1 caller */
    468 	PAD_P1_SWITCH;
    469 }
    470 
    471 void
    472 sh4_emode_dcache_wbinv_all(void)
    473 {
    474 	vaddr_t va = 0;
    475 	vaddr_t eva = SH4_EMODE_DCACHE_SIZE;
    476 
    477 	while (va < eva) {
    478 		cache_sh4_emode_op_8lines_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
    479 		    (CCDA_U | CCDA_V), 14);
    480 		va += 32 * 8;
    481 	}
    482 }
    483 
    484 void
    485 sh4_emode_dcache_wbinv_range_index(vaddr_t va, vsize_t sz)
    486 {
    487 	vaddr_t eva = round_line(va + sz);
    488 	va = trunc_line(va);
    489 
    490 	while ((eva - va) >= (8 * 32)) {
    491 		cache_sh4_emode_op_8lines_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
    492 		    (CCDA_U | CCDA_V), 14);
    493 		va += 32 * 8;
    494 	}
    495 
    496 	while (va < eva) {
    497 		cache_sh4_emode_op_line_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
    498 		    (CCDA_U | CCDA_V), 14);
    499 		va += 32;
    500 	}
    501 }
    502