Home | History | Annotate | Line # | Download | only in dev
ecc_plb.c revision 1.10.12.1
      1  1.10.12.1     tron /*	$NetBSD: ecc_plb.c,v 1.10.12.1 2006/05/24 15:48:19 tron Exp $	*/
      2        1.1      scw 
      3        1.1      scw /*
      4        1.1      scw  * Copyright 2001 Wasabi Systems, Inc.
      5        1.1      scw  * All rights reserved.
      6        1.1      scw  *
      7        1.1      scw  * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc.
      8        1.1      scw  *
      9        1.1      scw  * Redistribution and use in source and binary forms, with or without
     10        1.1      scw  * modification, are permitted provided that the following conditions
     11        1.1      scw  * are met:
     12        1.1      scw  * 1. Redistributions of source code must retain the above copyright
     13        1.1      scw  *    notice, this list of conditions and the following disclaimer.
     14        1.1      scw  * 2. Redistributions in binary form must reproduce the above copyright
     15        1.1      scw  *    notice, this list of conditions and the following disclaimer in the
     16        1.1      scw  *    documentation and/or other materials provided with the distribution.
     17        1.1      scw  * 3. All advertising materials mentioning features or use of this software
     18        1.1      scw  *    must display the following acknowledgement:
     19        1.1      scw  *      This product includes software developed for the NetBSD Project by
     20        1.1      scw  *      Wasabi Systems, Inc.
     21        1.1      scw  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
     22        1.1      scw  *    or promote products derived from this software without specific prior
     23        1.1      scw  *    written permission.
     24        1.1      scw  *
     25        1.1      scw  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
     26        1.1      scw  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27        1.1      scw  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28        1.1      scw  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
     29        1.1      scw  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30        1.1      scw  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31        1.1      scw  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32        1.1      scw  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33        1.1      scw  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34        1.1      scw  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35        1.1      scw  * POSSIBILITY OF SUCH DAMAGE.
     36        1.1      scw  */
     37        1.8    lukem 
     38        1.8    lukem #include <sys/cdefs.h>
     39  1.10.12.1     tron __KERNEL_RCSID(0, "$NetBSD: ecc_plb.c,v 1.10.12.1 2006/05/24 15:48:19 tron Exp $");
     40        1.1      scw 
     41        1.1      scw #include "locators.h"
     42        1.1      scw 
     43        1.1      scw #include <sys/param.h>
     44        1.1      scw #include <sys/systm.h>
     45        1.1      scw #include <sys/device.h>
     46  1.10.12.1     tron 
     47  1.10.12.1     tron #include <prop/proplib.h>
     48        1.1      scw 
     49        1.1      scw #include <machine/cpu.h>
     50        1.7  hannken #include <powerpc/ibm4xx/dcr405gp.h>
     51        1.1      scw #include <powerpc/ibm4xx/dev/plbvar.h>
     52        1.1      scw 
     53        1.1      scw 
     54        1.1      scw struct ecc_plb_softc {
     55        1.1      scw 	struct device sc_dev;
     56        1.1      scw 	u_quad_t sc_ecc_tb;
     57        1.1      scw 	u_quad_t sc_ecc_iv;	 /* Interval */
     58        1.1      scw 	u_int32_t sc_ecc_cnt;
     59        1.1      scw 	u_int sc_memsize;
     60        1.1      scw 	int sc_irq;
     61        1.1      scw };
     62        1.1      scw 
     63        1.1      scw static int	ecc_plbmatch(struct device *, struct cfdata *, void *);
     64        1.1      scw static void	ecc_plbattach(struct device *, struct device *, void *);
     65        1.1      scw static void	ecc_plb_deferred(struct device *);
     66        1.1      scw static int	ecc_plb_intr(void *);
     67        1.1      scw 
     68        1.5  thorpej CFATTACH_DECL(ecc_plb, sizeof(struct ecc_plb_softc),
     69        1.6  thorpej     ecc_plbmatch, ecc_plbattach, NULL, NULL);
     70        1.1      scw 
     71        1.1      scw static int ecc_plb_found;
     72        1.1      scw 
     73        1.1      scw static int
     74        1.1      scw ecc_plbmatch(struct device *parent, struct cfdata *cf, void *aux)
     75        1.1      scw {
     76        1.1      scw 	struct plb_attach_args *paa = aux;
     77        1.1      scw 
     78        1.2  thorpej 	if (strcmp(paa->plb_name, cf->cf_name) != 0)
     79        1.1      scw 		return (0);
     80        1.1      scw 
     81        1.1      scw 	if (cf->cf_loc[PLBCF_IRQ] == PLBCF_IRQ_DEFAULT)
     82        1.3   provos 		panic("ecc_plbmatch: wildcard IRQ not allowed");
     83        1.1      scw 
     84        1.1      scw 	paa->plb_irq = cf->cf_loc[PLBCF_IRQ];
     85        1.1      scw 
     86        1.1      scw 	return (!ecc_plb_found);
     87        1.1      scw }
     88        1.1      scw 
     89        1.1      scw static void
     90        1.1      scw ecc_plbattach(struct device *parent, struct device *self, void *aux)
     91        1.1      scw {
     92        1.1      scw 	struct ecc_plb_softc *sc = (struct ecc_plb_softc *)self;
     93        1.1      scw 	struct plb_attach_args *paa = aux;
     94        1.1      scw 	unsigned int processor_freq;
     95        1.1      scw 	unsigned int memsiz;
     96  1.10.12.1     tron 	prop_number_t pn;
     97        1.1      scw 
     98        1.1      scw 	ecc_plb_found++;
     99        1.1      scw 
    100  1.10.12.1     tron 	pn = prop_dictionary_get(board_properties, "processor-frequency");
    101  1.10.12.1     tron 	KASSERT(pn != NULL);
    102  1.10.12.1     tron 	processor_freq = (unsigned int) prop_number_integer_value(pn);
    103  1.10.12.1     tron 
    104  1.10.12.1     tron 	pn = prop_dictionary_get(board_properties, "mem-size");
    105  1.10.12.1     tron 	KASSERT(pn != NULL);
    106  1.10.12.1     tron 	memsiz = (unsigned int) prop_number_integer_value(pn);
    107        1.1      scw 
    108        1.1      scw 	printf(": ECC controller\n");
    109        1.1      scw 
    110        1.1      scw 	sc->sc_ecc_tb = 0;
    111        1.1      scw 	sc->sc_ecc_cnt = 0;
    112        1.1      scw 	sc->sc_ecc_iv = processor_freq; /* Set interval */
    113        1.1      scw 	sc->sc_memsize = memsiz;
    114        1.1      scw 	sc->sc_irq = paa->plb_irq;
    115        1.1      scw 
    116        1.1      scw 	/*
    117        1.1      scw 	 * Defer hooking the interrupt until all PLB devices have attached
    118        1.1      scw 	 * since the interrupt controller may well be one of those devices...
    119        1.1      scw 	 */
    120        1.1      scw 	config_defer(self, ecc_plb_deferred);
    121        1.1      scw }
    122        1.1      scw 
    123        1.1      scw static void
    124        1.1      scw ecc_plb_deferred(struct device *self)
    125        1.1      scw {
    126        1.1      scw 	struct ecc_plb_softc *sc = (struct ecc_plb_softc *)self;
    127        1.1      scw 
    128        1.1      scw 	intr_establish(sc->sc_irq, IST_LEVEL, IPL_SERIAL, ecc_plb_intr, NULL);
    129        1.1      scw }
    130        1.1      scw 
    131        1.1      scw /*
    132        1.1      scw  * ECC fault handler.
    133        1.1      scw  */
    134        1.1      scw static int
    135        1.1      scw ecc_plb_intr(void *arg)
    136        1.1      scw {
    137        1.1      scw 	struct ecc_plb_softc *sc = arg;
    138        1.1      scw 	u_int32_t		esr, ear;
    139        1.1      scw 	int			ce, ue;
    140        1.1      scw 	u_quad_t		tb;
    141        1.1      scw 	u_long			tmp, msr, dat;
    142        1.1      scw 
    143        1.1      scw 	/* This code needs to be improved to handle double-bit errors */
    144        1.1      scw 	/* in some intelligent fashion. */
    145        1.1      scw 
    146        1.1      scw 	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
    147        1.1      scw 	esr = mfdcr(DCR_SDRAM0_CFGDATA);
    148        1.1      scw 
    149        1.1      scw 	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_BEAR);
    150        1.1      scw 	ear = mfdcr(DCR_SDRAM0_CFGDATA);
    151        1.1      scw 
    152        1.1      scw 	/* Always clear the error to stop the intr ASAP. */
    153        1.1      scw 
    154        1.1      scw 	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
    155        1.1      scw 	mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
    156        1.1      scw 
    157        1.1      scw 	if (esr == 0x00) {
    158        1.1      scw 		/* No current error.  Could happen due to intr. nesting */
    159        1.1      scw 		return(1);
    160        1.1      scw 	}
    161        1.1      scw 
    162        1.1      scw 	/*
    163        1.1      scw 	 * Only report errors every once per second max. Do this using the TB,
    164        1.1      scw 	 * because the system time (via microtime) may be adjusted when the
    165        1.1      scw 	 * date is set and can't reliably be used to measure intervals.
    166        1.1      scw 	 */
    167        1.1      scw 
    168       1.10    perry 	__asm ("1: mftbu %0; mftb %0+1; mftbu %1; cmpw %0,%1; bne 1b"
    169        1.1      scw 		: "=r"(tb), "=r"(tmp));
    170        1.1      scw 	sc->sc_ecc_cnt++;
    171        1.1      scw 
    172        1.1      scw 	if ((tb - sc->sc_ecc_tb) < sc->sc_ecc_iv)
    173        1.1      scw 		return(1);
    174        1.1      scw 
    175        1.1      scw 	ce = (esr & SDRAM0_ECCESR_CE) != 0x00;
    176        1.1      scw 	ue = (esr & SDRAM0_ECCESR_UE) != 0x00;
    177        1.1      scw 
    178        1.1      scw 	printf("ECC: Error CNT=%d ESR=%x EAR=%x %s BKNE=%d%d%d%d "
    179        1.1      scw 		"BLCE=%d%d%d%d CBE=%d%d.\n",
    180        1.1      scw 		sc->sc_ecc_cnt, esr, ear,
    181        1.1      scw 		(ue) ? "Uncorrectable" : "Correctable",
    182        1.1      scw 		((esr & SDRAM0_ECCESR_BKEN(0)) != 0x00),
    183        1.1      scw 		((esr & SDRAM0_ECCESR_BKEN(1)) != 0x00),
    184        1.1      scw 		((esr & SDRAM0_ECCESR_BKEN(2)) != 0x00),
    185        1.1      scw 		((esr & SDRAM0_ECCESR_BKEN(3)) != 0x00),
    186        1.1      scw 		((esr & SDRAM0_ECCESR_BLCEN(0)) != 0x00),
    187        1.1      scw 		((esr & SDRAM0_ECCESR_BLCEN(1)) != 0x00),
    188        1.1      scw 		((esr & SDRAM0_ECCESR_BLCEN(2)) != 0x00),
    189        1.1      scw 		((esr & SDRAM0_ECCESR_BLCEN(3)) != 0x00),
    190        1.1      scw 		((esr & SDRAM0_ECCESR_CBEN(0)) != 0x00),
    191        1.1      scw 		((esr & SDRAM0_ECCESR_CBEN(1)) != 0x00));
    192        1.1      scw 
    193        1.1      scw 	/* Should check for uncorrectable errors and panic... */
    194        1.1      scw 
    195        1.1      scw 	if (sc->sc_ecc_cnt > 1000) {
    196        1.1      scw 		printf("ECC: Too many errors, recycling entire "
    197        1.1      scw 			"SDRAM (size = %d).\n", sc->sc_memsize);
    198        1.1      scw 
    199        1.1      scw 		/*
    200        1.1      scw 		 * Can this code be changed to run without disabling data MMU
    201        1.1      scw 		 * and disabling intrs?
    202        1.1      scw 		 * Does kernel always map all of physical RAM VA=PA? If so,
    203        1.1      scw 		 * just loop over lowmem.
    204        1.1      scw 		 */
    205       1.10    perry 		__asm volatile(
    206        1.1      scw 			"mfmsr 	%0;"
    207        1.1      scw 			"li	%1, 0x00;"
    208        1.1      scw 			"ori	%1, %1, 0x8010;"
    209        1.1      scw 			"andc	%1, %0, %1;"
    210        1.1      scw 			"mtmsr	%1;"
    211        1.1      scw 			"sync;isync;"
    212        1.1      scw 			"li	%1, 0x00;"
    213        1.1      scw 			"1:"
    214        1.1      scw 			"dcbt	0, %1;"
    215        1.1      scw 			"sync;isync;"
    216        1.1      scw 			"lwz	%2, 0(%1);"
    217        1.1      scw 			"stw	%2, 0(%1);"
    218        1.1      scw 			"sync;isync;"
    219        1.1      scw 			"dcbf	0, %1;"
    220        1.1      scw 			"sync;isync;"
    221        1.1      scw 			"addi	%1, %1, 0x20;"
    222        1.1      scw 			"addic.	%3, %3, -0x20;"
    223        1.1      scw 			"bge 	1b;"
    224        1.1      scw 			"mtmsr %0;"
    225        1.1      scw 			"sync;isync;"
    226        1.1      scw 		: "=&r" (msr), "=&r" (tmp), "=&r" (dat)
    227        1.1      scw 		: "r" (sc->sc_memsize) : "0" );
    228        1.1      scw 
    229        1.1      scw 		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
    230        1.1      scw 		esr = mfdcr(DCR_SDRAM0_CFGDATA);
    231        1.1      scw 
    232        1.1      scw 		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
    233        1.1      scw 		mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
    234        1.1      scw 
    235        1.1      scw 		/*
    236        1.1      scw 		 * Correctable errors here are OK, mem should be clean now.
    237        1.1      scw 		 *
    238        1.1      scw 		 * Should check for uncorrectable errors and panic...
    239        1.1      scw 		 */
    240        1.1      scw 		printf("ECC: Recycling complete, ESR=%x. "
    241        1.1      scw 			"Checking for persistent errors.\n", esr);
    242        1.1      scw 
    243       1.10    perry 		__asm volatile(
    244        1.1      scw 			"mfmsr 	%0;"
    245        1.1      scw 			"li	%1, 0x00;"
    246        1.1      scw 			"ori	%1, %1, 0x8010;"
    247        1.1      scw 			"andc	%1, %0, %1;"
    248        1.1      scw 			"mtmsr	%1;"
    249        1.1      scw 			"sync;isync;"
    250        1.1      scw 			"li	%1, 0x00;"
    251        1.1      scw 			"1:"
    252        1.1      scw 			"dcbt	0, %1;"
    253        1.1      scw 			"sync;isync;"
    254        1.1      scw 			"lwz	%2, 0(%1);"
    255        1.1      scw 			"stw	%2, 0(%1);"
    256        1.1      scw 			"sync;isync;"
    257        1.1      scw 			"dcbf	0, %1;"
    258        1.1      scw 			"sync;isync;"
    259        1.1      scw 			"addi	%1, %1, 0x20;"
    260        1.1      scw 			"addic.	%3, %3, -0x20;"
    261        1.1      scw 			"bge 	1b;"
    262        1.1      scw 			"mtmsr %0;"
    263        1.1      scw 			"sync;isync;"
    264        1.1      scw 		: "=&r" (msr), "=&r" (tmp), "=&r" (dat)
    265        1.1      scw 		: "r" (sc->sc_memsize) : "0" );
    266        1.1      scw 
    267        1.1      scw 		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
    268        1.1      scw 		esr = mfdcr(DCR_SDRAM0_CFGDATA);
    269        1.1      scw 
    270        1.1      scw 		/*
    271        1.1      scw 		 * If esr is non zero here, we're screwed.
    272        1.1      scw 		 * Should check this and panic.
    273        1.1      scw 		 */
    274        1.1      scw 		printf("ECC: Persistent error check complete, "
    275        1.1      scw 			"final ESR=%x.\n", esr);
    276        1.1      scw 	}
    277        1.1      scw 
    278        1.1      scw 	sc->sc_ecc_tb = tb;
    279        1.1      scw 	sc->sc_ecc_cnt = 0;
    280        1.1      scw 
    281        1.1      scw 	return(1);
    282        1.1      scw }
    283