Home | History | Annotate | Line # | Download | only in dev
ecc_plb.c revision 1.11
      1 /*	$NetBSD: ecc_plb.c,v 1.11 2006/05/05 18:04:42 thorpej Exp $	*/
      2 
      3 /*
      4  * Copyright 2001 Wasabi Systems, Inc.
      5  * All rights reserved.
      6  *
      7  * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *      This product includes software developed for the NetBSD Project by
     20  *      Wasabi Systems, Inc.
     21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
     22  *    or promote products derived from this software without specific prior
     23  *    written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
     26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 #include <sys/cdefs.h>
     39 __KERNEL_RCSID(0, "$NetBSD: ecc_plb.c,v 1.11 2006/05/05 18:04:42 thorpej Exp $");
     40 
     41 #include "locators.h"
     42 
     43 #include <sys/param.h>
     44 #include <sys/systm.h>
     45 #include <sys/device.h>
     46 
     47 #include <prop/proplib.h>
     48 
     49 #include <machine/cpu.h>
     50 #include <powerpc/ibm4xx/dcr405gp.h>
     51 #include <powerpc/ibm4xx/dev/plbvar.h>
     52 
     53 
     54 struct ecc_plb_softc {
     55 	struct device sc_dev;
     56 	u_quad_t sc_ecc_tb;
     57 	u_quad_t sc_ecc_iv;	 /* Interval */
     58 	u_int32_t sc_ecc_cnt;
     59 	u_int sc_memsize;
     60 	int sc_irq;
     61 };
     62 
     63 static int	ecc_plbmatch(struct device *, struct cfdata *, void *);
     64 static void	ecc_plbattach(struct device *, struct device *, void *);
     65 static void	ecc_plb_deferred(struct device *);
     66 static int	ecc_plb_intr(void *);
     67 
     68 CFATTACH_DECL(ecc_plb, sizeof(struct ecc_plb_softc),
     69     ecc_plbmatch, ecc_plbattach, NULL, NULL);
     70 
     71 static int ecc_plb_found;
     72 
     73 static int
     74 ecc_plbmatch(struct device *parent, struct cfdata *cf, void *aux)
     75 {
     76 	struct plb_attach_args *paa = aux;
     77 
     78 	if (strcmp(paa->plb_name, cf->cf_name) != 0)
     79 		return (0);
     80 
     81 	if (cf->cf_loc[PLBCF_IRQ] == PLBCF_IRQ_DEFAULT)
     82 		panic("ecc_plbmatch: wildcard IRQ not allowed");
     83 
     84 	paa->plb_irq = cf->cf_loc[PLBCF_IRQ];
     85 
     86 	return (!ecc_plb_found);
     87 }
     88 
     89 static void
     90 ecc_plbattach(struct device *parent, struct device *self, void *aux)
     91 {
     92 	struct ecc_plb_softc *sc = (struct ecc_plb_softc *)self;
     93 	struct plb_attach_args *paa = aux;
     94 	unsigned int processor_freq;
     95 	unsigned int memsiz;
     96 	prop_number_t pn;
     97 
     98 	ecc_plb_found++;
     99 
    100 	pn = prop_dictionary_get(board_properties, "processor-frequency");
    101 	KASSERT(pn != NULL);
    102 	processor_freq = (unsigned int) prop_number_integer_value(pn);
    103 
    104 	pn = prop_dictionary_get(board_properties, "mem-size");
    105 	KASSERT(pn != NULL);
    106 	memsiz = (unsigned int) prop_number_integer_value(pn);
    107 
    108 	printf(": ECC controller\n");
    109 
    110 	sc->sc_ecc_tb = 0;
    111 	sc->sc_ecc_cnt = 0;
    112 	sc->sc_ecc_iv = processor_freq; /* Set interval */
    113 	sc->sc_memsize = memsiz;
    114 	sc->sc_irq = paa->plb_irq;
    115 
    116 	/*
    117 	 * Defer hooking the interrupt until all PLB devices have attached
    118 	 * since the interrupt controller may well be one of those devices...
    119 	 */
    120 	config_defer(self, ecc_plb_deferred);
    121 }
    122 
    123 static void
    124 ecc_plb_deferred(struct device *self)
    125 {
    126 	struct ecc_plb_softc *sc = (struct ecc_plb_softc *)self;
    127 
    128 	intr_establish(sc->sc_irq, IST_LEVEL, IPL_SERIAL, ecc_plb_intr, NULL);
    129 }
    130 
    131 /*
    132  * ECC fault handler.
    133  */
    134 static int
    135 ecc_plb_intr(void *arg)
    136 {
    137 	struct ecc_plb_softc *sc = arg;
    138 	u_int32_t		esr, ear;
    139 	int			ce, ue;
    140 	u_quad_t		tb;
    141 	u_long			tmp, msr, dat;
    142 
    143 	/* This code needs to be improved to handle double-bit errors */
    144 	/* in some intelligent fashion. */
    145 
    146 	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
    147 	esr = mfdcr(DCR_SDRAM0_CFGDATA);
    148 
    149 	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_BEAR);
    150 	ear = mfdcr(DCR_SDRAM0_CFGDATA);
    151 
    152 	/* Always clear the error to stop the intr ASAP. */
    153 
    154 	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
    155 	mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
    156 
    157 	if (esr == 0x00) {
    158 		/* No current error.  Could happen due to intr. nesting */
    159 		return(1);
    160 	}
    161 
    162 	/*
    163 	 * Only report errors every once per second max. Do this using the TB,
    164 	 * because the system time (via microtime) may be adjusted when the
    165 	 * date is set and can't reliably be used to measure intervals.
    166 	 */
    167 
    168 	__asm ("1: mftbu %0; mftb %0+1; mftbu %1; cmpw %0,%1; bne 1b"
    169 		: "=r"(tb), "=r"(tmp));
    170 	sc->sc_ecc_cnt++;
    171 
    172 	if ((tb - sc->sc_ecc_tb) < sc->sc_ecc_iv)
    173 		return(1);
    174 
    175 	ce = (esr & SDRAM0_ECCESR_CE) != 0x00;
    176 	ue = (esr & SDRAM0_ECCESR_UE) != 0x00;
    177 
    178 	printf("ECC: Error CNT=%d ESR=%x EAR=%x %s BKNE=%d%d%d%d "
    179 		"BLCE=%d%d%d%d CBE=%d%d.\n",
    180 		sc->sc_ecc_cnt, esr, ear,
    181 		(ue) ? "Uncorrectable" : "Correctable",
    182 		((esr & SDRAM0_ECCESR_BKEN(0)) != 0x00),
    183 		((esr & SDRAM0_ECCESR_BKEN(1)) != 0x00),
    184 		((esr & SDRAM0_ECCESR_BKEN(2)) != 0x00),
    185 		((esr & SDRAM0_ECCESR_BKEN(3)) != 0x00),
    186 		((esr & SDRAM0_ECCESR_BLCEN(0)) != 0x00),
    187 		((esr & SDRAM0_ECCESR_BLCEN(1)) != 0x00),
    188 		((esr & SDRAM0_ECCESR_BLCEN(2)) != 0x00),
    189 		((esr & SDRAM0_ECCESR_BLCEN(3)) != 0x00),
    190 		((esr & SDRAM0_ECCESR_CBEN(0)) != 0x00),
    191 		((esr & SDRAM0_ECCESR_CBEN(1)) != 0x00));
    192 
    193 	/* Should check for uncorrectable errors and panic... */
    194 
    195 	if (sc->sc_ecc_cnt > 1000) {
    196 		printf("ECC: Too many errors, recycling entire "
    197 			"SDRAM (size = %d).\n", sc->sc_memsize);
    198 
    199 		/*
    200 		 * Can this code be changed to run without disabling data MMU
    201 		 * and disabling intrs?
    202 		 * Does kernel always map all of physical RAM VA=PA? If so,
    203 		 * just loop over lowmem.
    204 		 */
    205 		__asm volatile(
    206 			"mfmsr 	%0;"
    207 			"li	%1, 0x00;"
    208 			"ori	%1, %1, 0x8010;"
    209 			"andc	%1, %0, %1;"
    210 			"mtmsr	%1;"
    211 			"sync;isync;"
    212 			"li	%1, 0x00;"
    213 			"1:"
    214 			"dcbt	0, %1;"
    215 			"sync;isync;"
    216 			"lwz	%2, 0(%1);"
    217 			"stw	%2, 0(%1);"
    218 			"sync;isync;"
    219 			"dcbf	0, %1;"
    220 			"sync;isync;"
    221 			"addi	%1, %1, 0x20;"
    222 			"addic.	%3, %3, -0x20;"
    223 			"bge 	1b;"
    224 			"mtmsr %0;"
    225 			"sync;isync;"
    226 		: "=&r" (msr), "=&r" (tmp), "=&r" (dat)
    227 		: "r" (sc->sc_memsize) : "0" );
    228 
    229 		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
    230 		esr = mfdcr(DCR_SDRAM0_CFGDATA);
    231 
    232 		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
    233 		mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
    234 
    235 		/*
    236 		 * Correctable errors here are OK, mem should be clean now.
    237 		 *
    238 		 * Should check for uncorrectable errors and panic...
    239 		 */
    240 		printf("ECC: Recycling complete, ESR=%x. "
    241 			"Checking for persistent errors.\n", esr);
    242 
    243 		__asm volatile(
    244 			"mfmsr 	%0;"
    245 			"li	%1, 0x00;"
    246 			"ori	%1, %1, 0x8010;"
    247 			"andc	%1, %0, %1;"
    248 			"mtmsr	%1;"
    249 			"sync;isync;"
    250 			"li	%1, 0x00;"
    251 			"1:"
    252 			"dcbt	0, %1;"
    253 			"sync;isync;"
    254 			"lwz	%2, 0(%1);"
    255 			"stw	%2, 0(%1);"
    256 			"sync;isync;"
    257 			"dcbf	0, %1;"
    258 			"sync;isync;"
    259 			"addi	%1, %1, 0x20;"
    260 			"addic.	%3, %3, -0x20;"
    261 			"bge 	1b;"
    262 			"mtmsr %0;"
    263 			"sync;isync;"
    264 		: "=&r" (msr), "=&r" (tmp), "=&r" (dat)
    265 		: "r" (sc->sc_memsize) : "0" );
    266 
    267 		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
    268 		esr = mfdcr(DCR_SDRAM0_CFGDATA);
    269 
    270 		/*
    271 		 * If esr is non zero here, we're screwed.
    272 		 * Should check this and panic.
    273 		 */
    274 		printf("ECC: Persistent error check complete, "
    275 			"final ESR=%x.\n", esr);
    276 	}
    277 
    278 	sc->sc_ecc_tb = tb;
    279 	sc->sc_ecc_cnt = 0;
    280 
    281 	return(1);
    282 }
    283