ecc_plb.c revision 1.8 1 /* $NetBSD: ecc_plb.c,v 1.8 2003/07/15 02:54:44 lukem Exp $ */
2
3 /*
4 * Copyright 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: ecc_plb.c,v 1.8 2003/07/15 02:54:44 lukem Exp $");
40
41 #include "locators.h"
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/device.h>
46 #include <sys/properties.h>
47
48 #include <machine/cpu.h>
49 #include <powerpc/ibm4xx/dcr405gp.h>
50 #include <powerpc/ibm4xx/dev/plbvar.h>
51
52
53 struct ecc_plb_softc {
54 struct device sc_dev;
55 u_quad_t sc_ecc_tb;
56 u_quad_t sc_ecc_iv; /* Interval */
57 u_int32_t sc_ecc_cnt;
58 u_int sc_memsize;
59 int sc_irq;
60 };
61
62 static int ecc_plbmatch(struct device *, struct cfdata *, void *);
63 static void ecc_plbattach(struct device *, struct device *, void *);
64 static void ecc_plb_deferred(struct device *);
65 static int ecc_plb_intr(void *);
66
67 CFATTACH_DECL(ecc_plb, sizeof(struct ecc_plb_softc),
68 ecc_plbmatch, ecc_plbattach, NULL, NULL);
69
70 static int ecc_plb_found;
71
72 static int
73 ecc_plbmatch(struct device *parent, struct cfdata *cf, void *aux)
74 {
75 struct plb_attach_args *paa = aux;
76
77 if (strcmp(paa->plb_name, cf->cf_name) != 0)
78 return (0);
79
80 if (cf->cf_loc[PLBCF_IRQ] == PLBCF_IRQ_DEFAULT)
81 panic("ecc_plbmatch: wildcard IRQ not allowed");
82
83 paa->plb_irq = cf->cf_loc[PLBCF_IRQ];
84
85 return (!ecc_plb_found);
86 }
87
88 static void
89 ecc_plbattach(struct device *parent, struct device *self, void *aux)
90 {
91 struct ecc_plb_softc *sc = (struct ecc_plb_softc *)self;
92 struct plb_attach_args *paa = aux;
93 unsigned int processor_freq;
94 unsigned int memsiz;
95
96 ecc_plb_found++;
97
98 if (board_info_get("processor-frequency",
99 &processor_freq, sizeof(processor_freq)) == -1)
100 panic("no processor-frequency");
101
102 if (board_info_get("mem-size", &memsiz, sizeof(memsiz)) == -1)
103 panic("no mem-size");
104
105 printf(": ECC controller\n");
106
107 sc->sc_ecc_tb = 0;
108 sc->sc_ecc_cnt = 0;
109 sc->sc_ecc_iv = processor_freq; /* Set interval */
110 sc->sc_memsize = memsiz;
111 sc->sc_irq = paa->plb_irq;
112
113 /*
114 * Defer hooking the interrupt until all PLB devices have attached
115 * since the interrupt controller may well be one of those devices...
116 */
117 config_defer(self, ecc_plb_deferred);
118 }
119
120 static void
121 ecc_plb_deferred(struct device *self)
122 {
123 struct ecc_plb_softc *sc = (struct ecc_plb_softc *)self;
124
125 intr_establish(sc->sc_irq, IST_LEVEL, IPL_SERIAL, ecc_plb_intr, NULL);
126 }
127
128 /*
129 * ECC fault handler.
130 */
131 static int
132 ecc_plb_intr(void *arg)
133 {
134 struct ecc_plb_softc *sc = arg;
135 u_int32_t esr, ear;
136 int ce, ue;
137 u_quad_t tb;
138 u_long tmp, msr, dat;
139
140 /* This code needs to be improved to handle double-bit errors */
141 /* in some intelligent fashion. */
142
143 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
144 esr = mfdcr(DCR_SDRAM0_CFGDATA);
145
146 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_BEAR);
147 ear = mfdcr(DCR_SDRAM0_CFGDATA);
148
149 /* Always clear the error to stop the intr ASAP. */
150
151 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
152 mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
153
154 if (esr == 0x00) {
155 /* No current error. Could happen due to intr. nesting */
156 return(1);
157 }
158
159 /*
160 * Only report errors every once per second max. Do this using the TB,
161 * because the system time (via microtime) may be adjusted when the
162 * date is set and can't reliably be used to measure intervals.
163 */
164
165 asm ("1: mftbu %0; mftb %0+1; mftbu %1; cmpw %0,%1; bne 1b"
166 : "=r"(tb), "=r"(tmp));
167 sc->sc_ecc_cnt++;
168
169 if ((tb - sc->sc_ecc_tb) < sc->sc_ecc_iv)
170 return(1);
171
172 ce = (esr & SDRAM0_ECCESR_CE) != 0x00;
173 ue = (esr & SDRAM0_ECCESR_UE) != 0x00;
174
175 printf("ECC: Error CNT=%d ESR=%x EAR=%x %s BKNE=%d%d%d%d "
176 "BLCE=%d%d%d%d CBE=%d%d.\n",
177 sc->sc_ecc_cnt, esr, ear,
178 (ue) ? "Uncorrectable" : "Correctable",
179 ((esr & SDRAM0_ECCESR_BKEN(0)) != 0x00),
180 ((esr & SDRAM0_ECCESR_BKEN(1)) != 0x00),
181 ((esr & SDRAM0_ECCESR_BKEN(2)) != 0x00),
182 ((esr & SDRAM0_ECCESR_BKEN(3)) != 0x00),
183 ((esr & SDRAM0_ECCESR_BLCEN(0)) != 0x00),
184 ((esr & SDRAM0_ECCESR_BLCEN(1)) != 0x00),
185 ((esr & SDRAM0_ECCESR_BLCEN(2)) != 0x00),
186 ((esr & SDRAM0_ECCESR_BLCEN(3)) != 0x00),
187 ((esr & SDRAM0_ECCESR_CBEN(0)) != 0x00),
188 ((esr & SDRAM0_ECCESR_CBEN(1)) != 0x00));
189
190 /* Should check for uncorrectable errors and panic... */
191
192 if (sc->sc_ecc_cnt > 1000) {
193 printf("ECC: Too many errors, recycling entire "
194 "SDRAM (size = %d).\n", sc->sc_memsize);
195
196 /*
197 * Can this code be changed to run without disabling data MMU
198 * and disabling intrs?
199 * Does kernel always map all of physical RAM VA=PA? If so,
200 * just loop over lowmem.
201 */
202 asm volatile(
203 "mfmsr %0;"
204 "li %1, 0x00;"
205 "ori %1, %1, 0x8010;"
206 "andc %1, %0, %1;"
207 "mtmsr %1;"
208 "sync;isync;"
209 "li %1, 0x00;"
210 "1:"
211 "dcbt 0, %1;"
212 "sync;isync;"
213 "lwz %2, 0(%1);"
214 "stw %2, 0(%1);"
215 "sync;isync;"
216 "dcbf 0, %1;"
217 "sync;isync;"
218 "addi %1, %1, 0x20;"
219 "addic. %3, %3, -0x20;"
220 "bge 1b;"
221 "mtmsr %0;"
222 "sync;isync;"
223 : "=&r" (msr), "=&r" (tmp), "=&r" (dat)
224 : "r" (sc->sc_memsize) : "0" );
225
226 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
227 esr = mfdcr(DCR_SDRAM0_CFGDATA);
228
229 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
230 mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
231
232 /*
233 * Correctable errors here are OK, mem should be clean now.
234 *
235 * Should check for uncorrectable errors and panic...
236 */
237 printf("ECC: Recycling complete, ESR=%x. "
238 "Checking for persistent errors.\n", esr);
239
240 asm volatile(
241 "mfmsr %0;"
242 "li %1, 0x00;"
243 "ori %1, %1, 0x8010;"
244 "andc %1, %0, %1;"
245 "mtmsr %1;"
246 "sync;isync;"
247 "li %1, 0x00;"
248 "1:"
249 "dcbt 0, %1;"
250 "sync;isync;"
251 "lwz %2, 0(%1);"
252 "stw %2, 0(%1);"
253 "sync;isync;"
254 "dcbf 0, %1;"
255 "sync;isync;"
256 "addi %1, %1, 0x20;"
257 "addic. %3, %3, -0x20;"
258 "bge 1b;"
259 "mtmsr %0;"
260 "sync;isync;"
261 : "=&r" (msr), "=&r" (tmp), "=&r" (dat)
262 : "r" (sc->sc_memsize) : "0" );
263
264 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
265 esr = mfdcr(DCR_SDRAM0_CFGDATA);
266
267 /*
268 * If esr is non zero here, we're screwed.
269 * Should check this and panic.
270 */
271 printf("ECC: Persistent error check complete, "
272 "final ESR=%x.\n", esr);
273 }
274
275 sc->sc_ecc_tb = tb;
276 sc->sc_ecc_cnt = 0;
277
278 return(1);
279 }
280