Home | History | Annotate | Line # | Download | only in dev
ecc_plb.c revision 1.1
      1  1.1  scw /*	$NetBSD: ecc_plb.c,v 1.1 2002/08/23 15:01:08 scw Exp $	*/
      2  1.1  scw 
      3  1.1  scw /*
      4  1.1  scw  * Copyright 2001 Wasabi Systems, Inc.
      5  1.1  scw  * All rights reserved.
      6  1.1  scw  *
      7  1.1  scw  * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc.
      8  1.1  scw  *
      9  1.1  scw  * Redistribution and use in source and binary forms, with or without
     10  1.1  scw  * modification, are permitted provided that the following conditions
     11  1.1  scw  * are met:
     12  1.1  scw  * 1. Redistributions of source code must retain the above copyright
     13  1.1  scw  *    notice, this list of conditions and the following disclaimer.
     14  1.1  scw  * 2. Redistributions in binary form must reproduce the above copyright
     15  1.1  scw  *    notice, this list of conditions and the following disclaimer in the
     16  1.1  scw  *    documentation and/or other materials provided with the distribution.
     17  1.1  scw  * 3. All advertising materials mentioning features or use of this software
     18  1.1  scw  *    must display the following acknowledgement:
     19  1.1  scw  *      This product includes software developed for the NetBSD Project by
     20  1.1  scw  *      Wasabi Systems, Inc.
     21  1.1  scw  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
     22  1.1  scw  *    or promote products derived from this software without specific prior
     23  1.1  scw  *    written permission.
     24  1.1  scw  *
     25  1.1  scw  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
     26  1.1  scw  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  1.1  scw  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  1.1  scw  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
     29  1.1  scw  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  1.1  scw  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  1.1  scw  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  1.1  scw  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  1.1  scw  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  1.1  scw  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  1.1  scw  * POSSIBILITY OF SUCH DAMAGE.
     36  1.1  scw  */
     37  1.1  scw 
     38  1.1  scw #include "locators.h"
     39  1.1  scw 
     40  1.1  scw #include <sys/param.h>
     41  1.1  scw #include <sys/systm.h>
     42  1.1  scw #include <sys/device.h>
     43  1.1  scw #include <sys/properties.h>
     44  1.1  scw 
     45  1.1  scw #include <machine/dcr.h>
     46  1.1  scw #include <machine/cpu.h>
     47  1.1  scw #include <powerpc/ibm4xx/dev/plbvar.h>
     48  1.1  scw 
     49  1.1  scw 
     50  1.1  scw struct ecc_plb_softc {
     51  1.1  scw 	struct device sc_dev;
     52  1.1  scw 	u_quad_t sc_ecc_tb;
     53  1.1  scw 	u_quad_t sc_ecc_iv;	 /* Interval */
     54  1.1  scw 	u_int32_t sc_ecc_cnt;
     55  1.1  scw 	u_int sc_memsize;
     56  1.1  scw 	int sc_irq;
     57  1.1  scw };
     58  1.1  scw 
     59  1.1  scw static int	ecc_plbmatch(struct device *, struct cfdata *, void *);
     60  1.1  scw static void	ecc_plbattach(struct device *, struct device *, void *);
     61  1.1  scw static void	ecc_plb_deferred(struct device *);
     62  1.1  scw static int	ecc_plb_intr(void *);
     63  1.1  scw 
     64  1.1  scw struct cfattach ecc_plb_ca = {
     65  1.1  scw 	sizeof(struct ecc_plb_softc), ecc_plbmatch, ecc_plbattach
     66  1.1  scw };
     67  1.1  scw 
     68  1.1  scw static int ecc_plb_found;
     69  1.1  scw 
     70  1.1  scw static int
     71  1.1  scw ecc_plbmatch(struct device *parent, struct cfdata *cf, void *aux)
     72  1.1  scw {
     73  1.1  scw 	struct plb_attach_args *paa = aux;
     74  1.1  scw 
     75  1.1  scw 	if (strcmp(paa->plb_name, cf->cf_driver->cd_name) != 0)
     76  1.1  scw 		return (0);
     77  1.1  scw 
     78  1.1  scw 	if (cf->cf_loc[PLBCF_IRQ] == PLBCF_IRQ_DEFAULT)
     79  1.1  scw 		panic("ecc_plbmatch: wildcard IRQ not allowed\n");
     80  1.1  scw 
     81  1.1  scw 	paa->plb_irq = cf->cf_loc[PLBCF_IRQ];
     82  1.1  scw 
     83  1.1  scw 	return (!ecc_plb_found);
     84  1.1  scw }
     85  1.1  scw 
     86  1.1  scw static void
     87  1.1  scw ecc_plbattach(struct device *parent, struct device *self, void *aux)
     88  1.1  scw {
     89  1.1  scw 	struct ecc_plb_softc *sc = (struct ecc_plb_softc *)self;
     90  1.1  scw 	struct plb_attach_args *paa = aux;
     91  1.1  scw 	unsigned int processor_freq;
     92  1.1  scw 	unsigned int memsiz;
     93  1.1  scw 
     94  1.1  scw 	ecc_plb_found++;
     95  1.1  scw 
     96  1.1  scw 	if (board_info_get("processor-frequency",
     97  1.1  scw 		&processor_freq, sizeof(processor_freq)) == -1)
     98  1.1  scw 		panic("no processor-frequency");
     99  1.1  scw 
    100  1.1  scw 	if (board_info_get("mem-size", &memsiz, sizeof(memsiz)) == -1)
    101  1.1  scw 		panic("no mem-size");
    102  1.1  scw 
    103  1.1  scw 	printf(": ECC controller\n");
    104  1.1  scw 
    105  1.1  scw 	sc->sc_ecc_tb = 0;
    106  1.1  scw 	sc->sc_ecc_cnt = 0;
    107  1.1  scw 	sc->sc_ecc_iv = processor_freq; /* Set interval */
    108  1.1  scw 	sc->sc_memsize = memsiz;
    109  1.1  scw 	sc->sc_irq = paa->plb_irq;
    110  1.1  scw 
    111  1.1  scw 	/*
    112  1.1  scw 	 * Defer hooking the interrupt until all PLB devices have attached
    113  1.1  scw 	 * since the interrupt controller may well be one of those devices...
    114  1.1  scw 	 */
    115  1.1  scw 	config_defer(self, ecc_plb_deferred);
    116  1.1  scw }
    117  1.1  scw 
    118  1.1  scw static void
    119  1.1  scw ecc_plb_deferred(struct device *self)
    120  1.1  scw {
    121  1.1  scw 	struct ecc_plb_softc *sc = (struct ecc_plb_softc *)self;
    122  1.1  scw 
    123  1.1  scw 	intr_establish(sc->sc_irq, IST_LEVEL, IPL_SERIAL, ecc_plb_intr, NULL);
    124  1.1  scw }
    125  1.1  scw 
    126  1.1  scw /*
    127  1.1  scw  * ECC fault handler.
    128  1.1  scw  */
    129  1.1  scw static int
    130  1.1  scw ecc_plb_intr(void *arg)
    131  1.1  scw {
    132  1.1  scw 	struct ecc_plb_softc *sc = arg;
    133  1.1  scw 	u_int32_t		esr, ear;
    134  1.1  scw 	int			ce, ue;
    135  1.1  scw 	u_quad_t		tb;
    136  1.1  scw 	u_long			tmp, msr, dat;
    137  1.1  scw 
    138  1.1  scw 	/* This code needs to be improved to handle double-bit errors */
    139  1.1  scw 	/* in some intelligent fashion. */
    140  1.1  scw 
    141  1.1  scw 	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
    142  1.1  scw 	esr = mfdcr(DCR_SDRAM0_CFGDATA);
    143  1.1  scw 
    144  1.1  scw 	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_BEAR);
    145  1.1  scw 	ear = mfdcr(DCR_SDRAM0_CFGDATA);
    146  1.1  scw 
    147  1.1  scw 	/* Always clear the error to stop the intr ASAP. */
    148  1.1  scw 
    149  1.1  scw 	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
    150  1.1  scw 	mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
    151  1.1  scw 
    152  1.1  scw 	if (esr == 0x00) {
    153  1.1  scw 		/* No current error.  Could happen due to intr. nesting */
    154  1.1  scw 		return(1);
    155  1.1  scw 	}
    156  1.1  scw 
    157  1.1  scw 	/*
    158  1.1  scw 	 * Only report errors every once per second max. Do this using the TB,
    159  1.1  scw 	 * because the system time (via microtime) may be adjusted when the
    160  1.1  scw 	 * date is set and can't reliably be used to measure intervals.
    161  1.1  scw 	 */
    162  1.1  scw 
    163  1.1  scw 	asm ("1: mftbu %0; mftb %0+1; mftbu %1; cmpw %0,%1; bne 1b"
    164  1.1  scw 		: "=r"(tb), "=r"(tmp));
    165  1.1  scw 	sc->sc_ecc_cnt++;
    166  1.1  scw 
    167  1.1  scw 	if ((tb - sc->sc_ecc_tb) < sc->sc_ecc_iv)
    168  1.1  scw 		return(1);
    169  1.1  scw 
    170  1.1  scw 	ce = (esr & SDRAM0_ECCESR_CE) != 0x00;
    171  1.1  scw 	ue = (esr & SDRAM0_ECCESR_UE) != 0x00;
    172  1.1  scw 
    173  1.1  scw 	printf("ECC: Error CNT=%d ESR=%x EAR=%x %s BKNE=%d%d%d%d "
    174  1.1  scw 		"BLCE=%d%d%d%d CBE=%d%d.\n",
    175  1.1  scw 		sc->sc_ecc_cnt, esr, ear,
    176  1.1  scw 		(ue) ? "Uncorrectable" : "Correctable",
    177  1.1  scw 		((esr & SDRAM0_ECCESR_BKEN(0)) != 0x00),
    178  1.1  scw 		((esr & SDRAM0_ECCESR_BKEN(1)) != 0x00),
    179  1.1  scw 		((esr & SDRAM0_ECCESR_BKEN(2)) != 0x00),
    180  1.1  scw 		((esr & SDRAM0_ECCESR_BKEN(3)) != 0x00),
    181  1.1  scw 		((esr & SDRAM0_ECCESR_BLCEN(0)) != 0x00),
    182  1.1  scw 		((esr & SDRAM0_ECCESR_BLCEN(1)) != 0x00),
    183  1.1  scw 		((esr & SDRAM0_ECCESR_BLCEN(2)) != 0x00),
    184  1.1  scw 		((esr & SDRAM0_ECCESR_BLCEN(3)) != 0x00),
    185  1.1  scw 		((esr & SDRAM0_ECCESR_CBEN(0)) != 0x00),
    186  1.1  scw 		((esr & SDRAM0_ECCESR_CBEN(1)) != 0x00));
    187  1.1  scw 
    188  1.1  scw 	/* Should check for uncorrectable errors and panic... */
    189  1.1  scw 
    190  1.1  scw 	if (sc->sc_ecc_cnt > 1000) {
    191  1.1  scw 		printf("ECC: Too many errors, recycling entire "
    192  1.1  scw 			"SDRAM (size = %d).\n", sc->sc_memsize);
    193  1.1  scw 
    194  1.1  scw 		/*
    195  1.1  scw 		 * Can this code be changed to run without disabling data MMU
    196  1.1  scw 		 * and disabling intrs?
    197  1.1  scw 		 * Does kernel always map all of physical RAM VA=PA? If so,
    198  1.1  scw 		 * just loop over lowmem.
    199  1.1  scw 		 */
    200  1.1  scw 		asm volatile(
    201  1.1  scw 			"mfmsr 	%0;"
    202  1.1  scw 			"li	%1, 0x00;"
    203  1.1  scw 			"ori	%1, %1, 0x8010;"
    204  1.1  scw 			"andc	%1, %0, %1;"
    205  1.1  scw 			"mtmsr	%1;"
    206  1.1  scw 			"sync;isync;"
    207  1.1  scw 			"li	%1, 0x00;"
    208  1.1  scw 			"1:"
    209  1.1  scw 			"dcbt	0, %1;"
    210  1.1  scw 			"sync;isync;"
    211  1.1  scw 			"lwz	%2, 0(%1);"
    212  1.1  scw 			"stw	%2, 0(%1);"
    213  1.1  scw 			"sync;isync;"
    214  1.1  scw 			"dcbf	0, %1;"
    215  1.1  scw 			"sync;isync;"
    216  1.1  scw 			"addi	%1, %1, 0x20;"
    217  1.1  scw 			"addic.	%3, %3, -0x20;"
    218  1.1  scw 			"bge 	1b;"
    219  1.1  scw 			"mtmsr %0;"
    220  1.1  scw 			"sync;isync;"
    221  1.1  scw 		: "=&r" (msr), "=&r" (tmp), "=&r" (dat)
    222  1.1  scw 		: "r" (sc->sc_memsize) : "0" );
    223  1.1  scw 
    224  1.1  scw 		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
    225  1.1  scw 		esr = mfdcr(DCR_SDRAM0_CFGDATA);
    226  1.1  scw 
    227  1.1  scw 		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
    228  1.1  scw 		mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
    229  1.1  scw 
    230  1.1  scw 		/*
    231  1.1  scw 		 * Correctable errors here are OK, mem should be clean now.
    232  1.1  scw 		 *
    233  1.1  scw 		 * Should check for uncorrectable errors and panic...
    234  1.1  scw 		 */
    235  1.1  scw 		printf("ECC: Recycling complete, ESR=%x. "
    236  1.1  scw 			"Checking for persistent errors.\n", esr);
    237  1.1  scw 
    238  1.1  scw 		asm volatile(
    239  1.1  scw 			"mfmsr 	%0;"
    240  1.1  scw 			"li	%1, 0x00;"
    241  1.1  scw 			"ori	%1, %1, 0x8010;"
    242  1.1  scw 			"andc	%1, %0, %1;"
    243  1.1  scw 			"mtmsr	%1;"
    244  1.1  scw 			"sync;isync;"
    245  1.1  scw 			"li	%1, 0x00;"
    246  1.1  scw 			"1:"
    247  1.1  scw 			"dcbt	0, %1;"
    248  1.1  scw 			"sync;isync;"
    249  1.1  scw 			"lwz	%2, 0(%1);"
    250  1.1  scw 			"stw	%2, 0(%1);"
    251  1.1  scw 			"sync;isync;"
    252  1.1  scw 			"dcbf	0, %1;"
    253  1.1  scw 			"sync;isync;"
    254  1.1  scw 			"addi	%1, %1, 0x20;"
    255  1.1  scw 			"addic.	%3, %3, -0x20;"
    256  1.1  scw 			"bge 	1b;"
    257  1.1  scw 			"mtmsr %0;"
    258  1.1  scw 			"sync;isync;"
    259  1.1  scw 		: "=&r" (msr), "=&r" (tmp), "=&r" (dat)
    260  1.1  scw 		: "r" (sc->sc_memsize) : "0" );
    261  1.1  scw 
    262  1.1  scw 		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
    263  1.1  scw 		esr = mfdcr(DCR_SDRAM0_CFGDATA);
    264  1.1  scw 
    265  1.1  scw 		/*
    266  1.1  scw 		 * If esr is non zero here, we're screwed.
    267  1.1  scw 		 * Should check this and panic.
    268  1.1  scw 		 */
    269  1.1  scw 		printf("ECC: Persistent error check complete, "
    270  1.1  scw 			"final ESR=%x.\n", esr);
    271  1.1  scw 	}
    272  1.1  scw 
    273  1.1  scw 	sc->sc_ecc_tb = tb;
    274  1.1  scw 	sc->sc_ecc_cnt = 0;
    275  1.1  scw 
    276  1.1  scw 	return(1);
    277  1.1  scw }
    278