1 1.16 rin /* $NetBSD: ecc_plb.c,v 1.16 2021/02/27 20:43:58 rin Exp $ */ 2 1.1 scw 3 1.1 scw /* 4 1.1 scw * Copyright 2001 Wasabi Systems, Inc. 5 1.1 scw * All rights reserved. 6 1.1 scw * 7 1.1 scw * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc. 8 1.1 scw * 9 1.1 scw * Redistribution and use in source and binary forms, with or without 10 1.1 scw * modification, are permitted provided that the following conditions 11 1.1 scw * are met: 12 1.1 scw * 1. Redistributions of source code must retain the above copyright 13 1.1 scw * notice, this list of conditions and the following disclaimer. 14 1.1 scw * 2. Redistributions in binary form must reproduce the above copyright 15 1.1 scw * notice, this list of conditions and the following disclaimer in the 16 1.1 scw * documentation and/or other materials provided with the distribution. 17 1.1 scw * 3. All advertising materials mentioning features or use of this software 18 1.1 scw * must display the following acknowledgement: 19 1.1 scw * This product includes software developed for the NetBSD Project by 20 1.1 scw * Wasabi Systems, Inc. 21 1.1 scw * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 1.1 scw * or promote products derived from this software without specific prior 23 1.1 scw * written permission. 24 1.1 scw * 25 1.1 scw * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 1.1 scw * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 1.1 scw * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 1.1 scw * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 1.1 scw * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 1.1 scw * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 1.1 scw * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 1.1 scw * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 1.1 scw * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 1.1 scw * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 1.1 scw * POSSIBILITY OF SUCH DAMAGE. 36 1.1 scw */ 37 1.8 lukem 38 1.8 lukem #include <sys/cdefs.h> 39 1.16 rin __KERNEL_RCSID(0, "$NetBSD: ecc_plb.c,v 1.16 2021/02/27 20:43:58 rin Exp $"); 40 1.1 scw 41 1.1 scw #include "locators.h" 42 1.1 scw 43 1.1 scw #include <sys/param.h> 44 1.1 scw #include <sys/systm.h> 45 1.1 scw #include <sys/device.h> 46 1.14 matt #include <sys/cpu.h> 47 1.11 thorpej 48 1.11 thorpej #include <prop/proplib.h> 49 1.1 scw 50 1.14 matt #include <powerpc/ibm4xx/cpu.h> 51 1.12 kiyohara #include <powerpc/ibm4xx/dcr4xx.h> 52 1.1 scw #include <powerpc/ibm4xx/dev/plbvar.h> 53 1.1 scw 54 1.1 scw 55 1.1 scw struct ecc_plb_softc { 56 1.13 matt device_t sc_dev; 57 1.13 matt uint64_t sc_ecc_tb; 58 1.13 matt uint64_t sc_ecc_iv; /* Interval */ 59 1.13 matt uint32_t sc_ecc_cnt; 60 1.1 scw u_int sc_memsize; 61 1.1 scw int sc_irq; 62 1.1 scw }; 63 1.1 scw 64 1.13 matt static int ecc_plbmatch(device_t, cfdata_t, void *); 65 1.13 matt static void ecc_plbattach(device_t, device_t, void *); 66 1.13 matt static void ecc_plb_deferred(device_t); 67 1.1 scw static int ecc_plb_intr(void *); 68 1.1 scw 69 1.13 matt CFATTACH_DECL_NEW(ecc_plb, sizeof(struct ecc_plb_softc), 70 1.6 thorpej ecc_plbmatch, ecc_plbattach, NULL, NULL); 71 1.1 scw 72 1.1 scw static int ecc_plb_found; 73 1.1 scw 74 1.1 scw static int 75 1.13 matt ecc_plbmatch(device_t parent, cfdata_t cf, void *aux) 76 1.1 scw { 77 1.1 scw struct plb_attach_args *paa = aux; 78 1.1 scw 79 1.2 thorpej if (strcmp(paa->plb_name, cf->cf_name) != 0) 80 1.1 scw return (0); 81 1.1 scw 82 1.1 scw if (cf->cf_loc[PLBCF_IRQ] == PLBCF_IRQ_DEFAULT) 83 1.3 provos panic("ecc_plbmatch: wildcard IRQ not allowed"); 84 1.1 scw 85 1.1 scw paa->plb_irq = cf->cf_loc[PLBCF_IRQ]; 86 1.1 scw 87 1.1 scw return (!ecc_plb_found); 88 1.1 scw } 89 1.1 scw 90 1.1 scw static void 91 1.13 matt ecc_plbattach(device_t parent, device_t self, void *aux) 92 1.1 scw { 93 1.13 matt struct ecc_plb_softc *sc = device_private(self); 94 1.1 scw struct plb_attach_args *paa = aux; 95 1.1 scw unsigned int processor_freq; 96 1.1 scw unsigned int memsiz; 97 1.11 thorpej prop_number_t pn; 98 1.1 scw 99 1.1 scw ecc_plb_found++; 100 1.1 scw 101 1.11 thorpej pn = prop_dictionary_get(board_properties, "processor-frequency"); 102 1.11 thorpej KASSERT(pn != NULL); 103 1.11 thorpej processor_freq = (unsigned int) prop_number_integer_value(pn); 104 1.11 thorpej 105 1.11 thorpej pn = prop_dictionary_get(board_properties, "mem-size"); 106 1.11 thorpej KASSERT(pn != NULL); 107 1.11 thorpej memsiz = (unsigned int) prop_number_integer_value(pn); 108 1.1 scw 109 1.13 matt aprint_normal(": ECC controller\n"); 110 1.1 scw 111 1.13 matt sc->sc_dev = self; 112 1.1 scw sc->sc_ecc_tb = 0; 113 1.1 scw sc->sc_ecc_cnt = 0; 114 1.1 scw sc->sc_ecc_iv = processor_freq; /* Set interval */ 115 1.1 scw sc->sc_memsize = memsiz; 116 1.1 scw sc->sc_irq = paa->plb_irq; 117 1.1 scw 118 1.1 scw /* 119 1.1 scw * Defer hooking the interrupt until all PLB devices have attached 120 1.1 scw * since the interrupt controller may well be one of those devices... 121 1.1 scw */ 122 1.1 scw config_defer(self, ecc_plb_deferred); 123 1.1 scw } 124 1.1 scw 125 1.1 scw static void 126 1.13 matt ecc_plb_deferred(device_t self) 127 1.1 scw { 128 1.13 matt struct ecc_plb_softc *sc = device_private(self); 129 1.1 scw 130 1.16 rin intr_establish_xname(sc->sc_irq, IST_LEVEL, IPL_SERIAL, ecc_plb_intr, 131 1.16 rin sc, device_xname(self)); 132 1.1 scw } 133 1.1 scw 134 1.1 scw /* 135 1.1 scw * ECC fault handler. 136 1.1 scw */ 137 1.1 scw static int 138 1.1 scw ecc_plb_intr(void *arg) 139 1.1 scw { 140 1.1 scw struct ecc_plb_softc *sc = arg; 141 1.1 scw u_int32_t esr, ear; 142 1.15 martin int ue; 143 1.1 scw u_quad_t tb; 144 1.1 scw u_long tmp, msr, dat; 145 1.1 scw 146 1.1 scw /* This code needs to be improved to handle double-bit errors */ 147 1.1 scw /* in some intelligent fashion. */ 148 1.1 scw 149 1.1 scw mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); 150 1.1 scw esr = mfdcr(DCR_SDRAM0_CFGDATA); 151 1.1 scw 152 1.1 scw mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_BEAR); 153 1.1 scw ear = mfdcr(DCR_SDRAM0_CFGDATA); 154 1.1 scw 155 1.1 scw /* Always clear the error to stop the intr ASAP. */ 156 1.1 scw 157 1.1 scw mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); 158 1.1 scw mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff); 159 1.1 scw 160 1.1 scw if (esr == 0x00) { 161 1.1 scw /* No current error. Could happen due to intr. nesting */ 162 1.1 scw return(1); 163 1.1 scw } 164 1.1 scw 165 1.1 scw /* 166 1.1 scw * Only report errors every once per second max. Do this using the TB, 167 1.1 scw * because the system time (via microtime) may be adjusted when the 168 1.1 scw * date is set and can't reliably be used to measure intervals. 169 1.1 scw */ 170 1.1 scw 171 1.10 perry __asm ("1: mftbu %0; mftb %0+1; mftbu %1; cmpw %0,%1; bne 1b" 172 1.1 scw : "=r"(tb), "=r"(tmp)); 173 1.1 scw sc->sc_ecc_cnt++; 174 1.1 scw 175 1.1 scw if ((tb - sc->sc_ecc_tb) < sc->sc_ecc_iv) 176 1.1 scw return(1); 177 1.1 scw 178 1.1 scw ue = (esr & SDRAM0_ECCESR_UE) != 0x00; 179 1.1 scw 180 1.1 scw printf("ECC: Error CNT=%d ESR=%x EAR=%x %s BKNE=%d%d%d%d " 181 1.1 scw "BLCE=%d%d%d%d CBE=%d%d.\n", 182 1.1 scw sc->sc_ecc_cnt, esr, ear, 183 1.1 scw (ue) ? "Uncorrectable" : "Correctable", 184 1.1 scw ((esr & SDRAM0_ECCESR_BKEN(0)) != 0x00), 185 1.1 scw ((esr & SDRAM0_ECCESR_BKEN(1)) != 0x00), 186 1.1 scw ((esr & SDRAM0_ECCESR_BKEN(2)) != 0x00), 187 1.1 scw ((esr & SDRAM0_ECCESR_BKEN(3)) != 0x00), 188 1.1 scw ((esr & SDRAM0_ECCESR_BLCEN(0)) != 0x00), 189 1.1 scw ((esr & SDRAM0_ECCESR_BLCEN(1)) != 0x00), 190 1.1 scw ((esr & SDRAM0_ECCESR_BLCEN(2)) != 0x00), 191 1.1 scw ((esr & SDRAM0_ECCESR_BLCEN(3)) != 0x00), 192 1.1 scw ((esr & SDRAM0_ECCESR_CBEN(0)) != 0x00), 193 1.1 scw ((esr & SDRAM0_ECCESR_CBEN(1)) != 0x00)); 194 1.1 scw 195 1.1 scw /* Should check for uncorrectable errors and panic... */ 196 1.1 scw 197 1.1 scw if (sc->sc_ecc_cnt > 1000) { 198 1.1 scw printf("ECC: Too many errors, recycling entire " 199 1.1 scw "SDRAM (size = %d).\n", sc->sc_memsize); 200 1.1 scw 201 1.1 scw /* 202 1.1 scw * Can this code be changed to run without disabling data MMU 203 1.1 scw * and disabling intrs? 204 1.1 scw * Does kernel always map all of physical RAM VA=PA? If so, 205 1.1 scw * just loop over lowmem. 206 1.1 scw */ 207 1.10 perry __asm volatile( 208 1.1 scw "mfmsr %0;" 209 1.1 scw "li %1, 0x00;" 210 1.1 scw "ori %1, %1, 0x8010;" 211 1.1 scw "andc %1, %0, %1;" 212 1.1 scw "mtmsr %1;" 213 1.1 scw "sync;isync;" 214 1.1 scw "li %1, 0x00;" 215 1.1 scw "1:" 216 1.1 scw "dcbt 0, %1;" 217 1.1 scw "sync;isync;" 218 1.1 scw "lwz %2, 0(%1);" 219 1.1 scw "stw %2, 0(%1);" 220 1.1 scw "sync;isync;" 221 1.1 scw "dcbf 0, %1;" 222 1.1 scw "sync;isync;" 223 1.1 scw "addi %1, %1, 0x20;" 224 1.1 scw "addic. %3, %3, -0x20;" 225 1.1 scw "bge 1b;" 226 1.1 scw "mtmsr %0;" 227 1.1 scw "sync;isync;" 228 1.1 scw : "=&r" (msr), "=&r" (tmp), "=&r" (dat) 229 1.1 scw : "r" (sc->sc_memsize) : "0" ); 230 1.1 scw 231 1.1 scw mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); 232 1.1 scw esr = mfdcr(DCR_SDRAM0_CFGDATA); 233 1.1 scw 234 1.1 scw mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); 235 1.1 scw mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff); 236 1.1 scw 237 1.1 scw /* 238 1.1 scw * Correctable errors here are OK, mem should be clean now. 239 1.1 scw * 240 1.1 scw * Should check for uncorrectable errors and panic... 241 1.1 scw */ 242 1.1 scw printf("ECC: Recycling complete, ESR=%x. " 243 1.1 scw "Checking for persistent errors.\n", esr); 244 1.1 scw 245 1.10 perry __asm volatile( 246 1.1 scw "mfmsr %0;" 247 1.1 scw "li %1, 0x00;" 248 1.1 scw "ori %1, %1, 0x8010;" 249 1.1 scw "andc %1, %0, %1;" 250 1.1 scw "mtmsr %1;" 251 1.1 scw "sync;isync;" 252 1.1 scw "li %1, 0x00;" 253 1.1 scw "1:" 254 1.1 scw "dcbt 0, %1;" 255 1.1 scw "sync;isync;" 256 1.1 scw "lwz %2, 0(%1);" 257 1.1 scw "stw %2, 0(%1);" 258 1.1 scw "sync;isync;" 259 1.1 scw "dcbf 0, %1;" 260 1.1 scw "sync;isync;" 261 1.1 scw "addi %1, %1, 0x20;" 262 1.1 scw "addic. %3, %3, -0x20;" 263 1.1 scw "bge 1b;" 264 1.1 scw "mtmsr %0;" 265 1.1 scw "sync;isync;" 266 1.1 scw : "=&r" (msr), "=&r" (tmp), "=&r" (dat) 267 1.1 scw : "r" (sc->sc_memsize) : "0" ); 268 1.1 scw 269 1.1 scw mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); 270 1.1 scw esr = mfdcr(DCR_SDRAM0_CFGDATA); 271 1.1 scw 272 1.1 scw /* 273 1.1 scw * If esr is non zero here, we're screwed. 274 1.1 scw * Should check this and panic. 275 1.1 scw */ 276 1.1 scw printf("ECC: Persistent error check complete, " 277 1.1 scw "final ESR=%x.\n", esr); 278 1.1 scw } 279 1.1 scw 280 1.1 scw sc->sc_ecc_tb = tb; 281 1.1 scw sc->sc_ecc_cnt = 0; 282 1.1 scw 283 1.1 scw return(1); 284 1.1 scw } 285