ecc_plb.c revision 1.15 1 1.15 martin /* $NetBSD: ecc_plb.c,v 1.15 2014/02/25 14:09:13 martin Exp $ */
2 1.1 scw
3 1.1 scw /*
4 1.1 scw * Copyright 2001 Wasabi Systems, Inc.
5 1.1 scw * All rights reserved.
6 1.1 scw *
7 1.1 scw * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc.
8 1.1 scw *
9 1.1 scw * Redistribution and use in source and binary forms, with or without
10 1.1 scw * modification, are permitted provided that the following conditions
11 1.1 scw * are met:
12 1.1 scw * 1. Redistributions of source code must retain the above copyright
13 1.1 scw * notice, this list of conditions and the following disclaimer.
14 1.1 scw * 2. Redistributions in binary form must reproduce the above copyright
15 1.1 scw * notice, this list of conditions and the following disclaimer in the
16 1.1 scw * documentation and/or other materials provided with the distribution.
17 1.1 scw * 3. All advertising materials mentioning features or use of this software
18 1.1 scw * must display the following acknowledgement:
19 1.1 scw * This product includes software developed for the NetBSD Project by
20 1.1 scw * Wasabi Systems, Inc.
21 1.1 scw * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 1.1 scw * or promote products derived from this software without specific prior
23 1.1 scw * written permission.
24 1.1 scw *
25 1.1 scw * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 1.1 scw * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 1.1 scw * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 1.1 scw * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 1.1 scw * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 1.1 scw * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 1.1 scw * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 1.1 scw * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 1.1 scw * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 1.1 scw * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 1.1 scw * POSSIBILITY OF SUCH DAMAGE.
36 1.1 scw */
37 1.8 lukem
38 1.8 lukem #include <sys/cdefs.h>
39 1.15 martin __KERNEL_RCSID(0, "$NetBSD: ecc_plb.c,v 1.15 2014/02/25 14:09:13 martin Exp $");
40 1.1 scw
41 1.1 scw #include "locators.h"
42 1.1 scw
43 1.1 scw #include <sys/param.h>
44 1.1 scw #include <sys/systm.h>
45 1.1 scw #include <sys/device.h>
46 1.14 matt #include <sys/cpu.h>
47 1.11 thorpej
48 1.11 thorpej #include <prop/proplib.h>
49 1.1 scw
50 1.14 matt #include <powerpc/ibm4xx/cpu.h>
51 1.12 kiyohara #include <powerpc/ibm4xx/dcr4xx.h>
52 1.1 scw #include <powerpc/ibm4xx/dev/plbvar.h>
53 1.1 scw
54 1.1 scw
55 1.1 scw struct ecc_plb_softc {
56 1.13 matt device_t sc_dev;
57 1.13 matt uint64_t sc_ecc_tb;
58 1.13 matt uint64_t sc_ecc_iv; /* Interval */
59 1.13 matt uint32_t sc_ecc_cnt;
60 1.1 scw u_int sc_memsize;
61 1.1 scw int sc_irq;
62 1.1 scw };
63 1.1 scw
64 1.13 matt static int ecc_plbmatch(device_t, cfdata_t, void *);
65 1.13 matt static void ecc_plbattach(device_t, device_t, void *);
66 1.13 matt static void ecc_plb_deferred(device_t);
67 1.1 scw static int ecc_plb_intr(void *);
68 1.1 scw
69 1.13 matt CFATTACH_DECL_NEW(ecc_plb, sizeof(struct ecc_plb_softc),
70 1.6 thorpej ecc_plbmatch, ecc_plbattach, NULL, NULL);
71 1.1 scw
72 1.1 scw static int ecc_plb_found;
73 1.1 scw
74 1.1 scw static int
75 1.13 matt ecc_plbmatch(device_t parent, cfdata_t cf, void *aux)
76 1.1 scw {
77 1.1 scw struct plb_attach_args *paa = aux;
78 1.1 scw
79 1.2 thorpej if (strcmp(paa->plb_name, cf->cf_name) != 0)
80 1.1 scw return (0);
81 1.1 scw
82 1.1 scw if (cf->cf_loc[PLBCF_IRQ] == PLBCF_IRQ_DEFAULT)
83 1.3 provos panic("ecc_plbmatch: wildcard IRQ not allowed");
84 1.1 scw
85 1.1 scw paa->plb_irq = cf->cf_loc[PLBCF_IRQ];
86 1.1 scw
87 1.1 scw return (!ecc_plb_found);
88 1.1 scw }
89 1.1 scw
90 1.1 scw static void
91 1.13 matt ecc_plbattach(device_t parent, device_t self, void *aux)
92 1.1 scw {
93 1.13 matt struct ecc_plb_softc *sc = device_private(self);
94 1.1 scw struct plb_attach_args *paa = aux;
95 1.1 scw unsigned int processor_freq;
96 1.1 scw unsigned int memsiz;
97 1.11 thorpej prop_number_t pn;
98 1.1 scw
99 1.1 scw ecc_plb_found++;
100 1.1 scw
101 1.11 thorpej pn = prop_dictionary_get(board_properties, "processor-frequency");
102 1.11 thorpej KASSERT(pn != NULL);
103 1.11 thorpej processor_freq = (unsigned int) prop_number_integer_value(pn);
104 1.11 thorpej
105 1.11 thorpej pn = prop_dictionary_get(board_properties, "mem-size");
106 1.11 thorpej KASSERT(pn != NULL);
107 1.11 thorpej memsiz = (unsigned int) prop_number_integer_value(pn);
108 1.1 scw
109 1.13 matt aprint_normal(": ECC controller\n");
110 1.1 scw
111 1.13 matt sc->sc_dev = self;
112 1.1 scw sc->sc_ecc_tb = 0;
113 1.1 scw sc->sc_ecc_cnt = 0;
114 1.1 scw sc->sc_ecc_iv = processor_freq; /* Set interval */
115 1.1 scw sc->sc_memsize = memsiz;
116 1.1 scw sc->sc_irq = paa->plb_irq;
117 1.1 scw
118 1.1 scw /*
119 1.1 scw * Defer hooking the interrupt until all PLB devices have attached
120 1.1 scw * since the interrupt controller may well be one of those devices...
121 1.1 scw */
122 1.1 scw config_defer(self, ecc_plb_deferred);
123 1.1 scw }
124 1.1 scw
125 1.1 scw static void
126 1.13 matt ecc_plb_deferred(device_t self)
127 1.1 scw {
128 1.13 matt struct ecc_plb_softc *sc = device_private(self);
129 1.1 scw
130 1.13 matt intr_establish(sc->sc_irq, IST_LEVEL, IPL_SERIAL, ecc_plb_intr, sc);
131 1.1 scw }
132 1.1 scw
133 1.1 scw /*
134 1.1 scw * ECC fault handler.
135 1.1 scw */
136 1.1 scw static int
137 1.1 scw ecc_plb_intr(void *arg)
138 1.1 scw {
139 1.1 scw struct ecc_plb_softc *sc = arg;
140 1.1 scw u_int32_t esr, ear;
141 1.15 martin int ue;
142 1.1 scw u_quad_t tb;
143 1.1 scw u_long tmp, msr, dat;
144 1.1 scw
145 1.1 scw /* This code needs to be improved to handle double-bit errors */
146 1.1 scw /* in some intelligent fashion. */
147 1.1 scw
148 1.1 scw mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
149 1.1 scw esr = mfdcr(DCR_SDRAM0_CFGDATA);
150 1.1 scw
151 1.1 scw mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_BEAR);
152 1.1 scw ear = mfdcr(DCR_SDRAM0_CFGDATA);
153 1.1 scw
154 1.1 scw /* Always clear the error to stop the intr ASAP. */
155 1.1 scw
156 1.1 scw mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
157 1.1 scw mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
158 1.1 scw
159 1.1 scw if (esr == 0x00) {
160 1.1 scw /* No current error. Could happen due to intr. nesting */
161 1.1 scw return(1);
162 1.1 scw }
163 1.1 scw
164 1.1 scw /*
165 1.1 scw * Only report errors every once per second max. Do this using the TB,
166 1.1 scw * because the system time (via microtime) may be adjusted when the
167 1.1 scw * date is set and can't reliably be used to measure intervals.
168 1.1 scw */
169 1.1 scw
170 1.10 perry __asm ("1: mftbu %0; mftb %0+1; mftbu %1; cmpw %0,%1; bne 1b"
171 1.1 scw : "=r"(tb), "=r"(tmp));
172 1.1 scw sc->sc_ecc_cnt++;
173 1.1 scw
174 1.1 scw if ((tb - sc->sc_ecc_tb) < sc->sc_ecc_iv)
175 1.1 scw return(1);
176 1.1 scw
177 1.1 scw ue = (esr & SDRAM0_ECCESR_UE) != 0x00;
178 1.1 scw
179 1.1 scw printf("ECC: Error CNT=%d ESR=%x EAR=%x %s BKNE=%d%d%d%d "
180 1.1 scw "BLCE=%d%d%d%d CBE=%d%d.\n",
181 1.1 scw sc->sc_ecc_cnt, esr, ear,
182 1.1 scw (ue) ? "Uncorrectable" : "Correctable",
183 1.1 scw ((esr & SDRAM0_ECCESR_BKEN(0)) != 0x00),
184 1.1 scw ((esr & SDRAM0_ECCESR_BKEN(1)) != 0x00),
185 1.1 scw ((esr & SDRAM0_ECCESR_BKEN(2)) != 0x00),
186 1.1 scw ((esr & SDRAM0_ECCESR_BKEN(3)) != 0x00),
187 1.1 scw ((esr & SDRAM0_ECCESR_BLCEN(0)) != 0x00),
188 1.1 scw ((esr & SDRAM0_ECCESR_BLCEN(1)) != 0x00),
189 1.1 scw ((esr & SDRAM0_ECCESR_BLCEN(2)) != 0x00),
190 1.1 scw ((esr & SDRAM0_ECCESR_BLCEN(3)) != 0x00),
191 1.1 scw ((esr & SDRAM0_ECCESR_CBEN(0)) != 0x00),
192 1.1 scw ((esr & SDRAM0_ECCESR_CBEN(1)) != 0x00));
193 1.1 scw
194 1.1 scw /* Should check for uncorrectable errors and panic... */
195 1.1 scw
196 1.1 scw if (sc->sc_ecc_cnt > 1000) {
197 1.1 scw printf("ECC: Too many errors, recycling entire "
198 1.1 scw "SDRAM (size = %d).\n", sc->sc_memsize);
199 1.1 scw
200 1.1 scw /*
201 1.1 scw * Can this code be changed to run without disabling data MMU
202 1.1 scw * and disabling intrs?
203 1.1 scw * Does kernel always map all of physical RAM VA=PA? If so,
204 1.1 scw * just loop over lowmem.
205 1.1 scw */
206 1.10 perry __asm volatile(
207 1.1 scw "mfmsr %0;"
208 1.1 scw "li %1, 0x00;"
209 1.1 scw "ori %1, %1, 0x8010;"
210 1.1 scw "andc %1, %0, %1;"
211 1.1 scw "mtmsr %1;"
212 1.1 scw "sync;isync;"
213 1.1 scw "li %1, 0x00;"
214 1.1 scw "1:"
215 1.1 scw "dcbt 0, %1;"
216 1.1 scw "sync;isync;"
217 1.1 scw "lwz %2, 0(%1);"
218 1.1 scw "stw %2, 0(%1);"
219 1.1 scw "sync;isync;"
220 1.1 scw "dcbf 0, %1;"
221 1.1 scw "sync;isync;"
222 1.1 scw "addi %1, %1, 0x20;"
223 1.1 scw "addic. %3, %3, -0x20;"
224 1.1 scw "bge 1b;"
225 1.1 scw "mtmsr %0;"
226 1.1 scw "sync;isync;"
227 1.1 scw : "=&r" (msr), "=&r" (tmp), "=&r" (dat)
228 1.1 scw : "r" (sc->sc_memsize) : "0" );
229 1.1 scw
230 1.1 scw mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
231 1.1 scw esr = mfdcr(DCR_SDRAM0_CFGDATA);
232 1.1 scw
233 1.1 scw mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
234 1.1 scw mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
235 1.1 scw
236 1.1 scw /*
237 1.1 scw * Correctable errors here are OK, mem should be clean now.
238 1.1 scw *
239 1.1 scw * Should check for uncorrectable errors and panic...
240 1.1 scw */
241 1.1 scw printf("ECC: Recycling complete, ESR=%x. "
242 1.1 scw "Checking for persistent errors.\n", esr);
243 1.1 scw
244 1.10 perry __asm volatile(
245 1.1 scw "mfmsr %0;"
246 1.1 scw "li %1, 0x00;"
247 1.1 scw "ori %1, %1, 0x8010;"
248 1.1 scw "andc %1, %0, %1;"
249 1.1 scw "mtmsr %1;"
250 1.1 scw "sync;isync;"
251 1.1 scw "li %1, 0x00;"
252 1.1 scw "1:"
253 1.1 scw "dcbt 0, %1;"
254 1.1 scw "sync;isync;"
255 1.1 scw "lwz %2, 0(%1);"
256 1.1 scw "stw %2, 0(%1);"
257 1.1 scw "sync;isync;"
258 1.1 scw "dcbf 0, %1;"
259 1.1 scw "sync;isync;"
260 1.1 scw "addi %1, %1, 0x20;"
261 1.1 scw "addic. %3, %3, -0x20;"
262 1.1 scw "bge 1b;"
263 1.1 scw "mtmsr %0;"
264 1.1 scw "sync;isync;"
265 1.1 scw : "=&r" (msr), "=&r" (tmp), "=&r" (dat)
266 1.1 scw : "r" (sc->sc_memsize) : "0" );
267 1.1 scw
268 1.1 scw mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
269 1.1 scw esr = mfdcr(DCR_SDRAM0_CFGDATA);
270 1.1 scw
271 1.1 scw /*
272 1.1 scw * If esr is non zero here, we're screwed.
273 1.1 scw * Should check this and panic.
274 1.1 scw */
275 1.1 scw printf("ECC: Persistent error check complete, "
276 1.1 scw "final ESR=%x.\n", esr);
277 1.1 scw }
278 1.1 scw
279 1.1 scw sc->sc_ecc_tb = tb;
280 1.1 scw sc->sc_ecc_cnt = 0;
281 1.1 scw
282 1.1 scw return(1);
283 1.1 scw }
284