Home | History | Annotate | Line # | Download | only in raidframe
rf_raid1.c revision 1.9
      1  1.9    oster /*	$NetBSD: rf_raid1.c,v 1.9 2002/07/13 20:14:34 oster Exp $	*/
      2  1.1    oster /*
      3  1.1    oster  * Copyright (c) 1995 Carnegie-Mellon University.
      4  1.1    oster  * All rights reserved.
      5  1.1    oster  *
      6  1.1    oster  * Author: William V. Courtright II
      7  1.1    oster  *
      8  1.1    oster  * Permission to use, copy, modify and distribute this software and
      9  1.1    oster  * its documentation is hereby granted, provided that both the copyright
     10  1.1    oster  * notice and this permission notice appear in all copies of the
     11  1.1    oster  * software, derivative works or modified versions, and any portions
     12  1.1    oster  * thereof, and that both notices appear in supporting documentation.
     13  1.1    oster  *
     14  1.1    oster  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  1.1    oster  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  1.1    oster  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  1.1    oster  *
     18  1.1    oster  * Carnegie Mellon requests users of this software to return to
     19  1.1    oster  *
     20  1.1    oster  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  1.1    oster  *  School of Computer Science
     22  1.1    oster  *  Carnegie Mellon University
     23  1.1    oster  *  Pittsburgh PA 15213-3890
     24  1.1    oster  *
     25  1.1    oster  * any improvements or extensions that they make and grant Carnegie the
     26  1.1    oster  * rights to redistribute these changes.
     27  1.1    oster  */
     28  1.1    oster 
     29  1.1    oster /*****************************************************************************
     30  1.1    oster  *
     31  1.1    oster  * rf_raid1.c -- implements RAID Level 1
     32  1.1    oster  *
     33  1.1    oster  *****************************************************************************/
     34  1.8    lukem 
     35  1.8    lukem #include <sys/cdefs.h>
     36  1.9    oster __KERNEL_RCSID(0, "$NetBSD: rf_raid1.c,v 1.9 2002/07/13 20:14:34 oster Exp $");
     37  1.1    oster 
     38  1.1    oster #include "rf_raid.h"
     39  1.1    oster #include "rf_raid1.h"
     40  1.1    oster #include "rf_dag.h"
     41  1.1    oster #include "rf_dagffrd.h"
     42  1.1    oster #include "rf_dagffwr.h"
     43  1.1    oster #include "rf_dagdegrd.h"
     44  1.1    oster #include "rf_dagutils.h"
     45  1.1    oster #include "rf_dagfuncs.h"
     46  1.1    oster #include "rf_diskqueue.h"
     47  1.1    oster #include "rf_general.h"
     48  1.1    oster #include "rf_utils.h"
     49  1.1    oster #include "rf_parityscan.h"
     50  1.1    oster #include "rf_mcpair.h"
     51  1.1    oster #include "rf_layout.h"
     52  1.1    oster #include "rf_map.h"
     53  1.1    oster #include "rf_engine.h"
     54  1.1    oster #include "rf_reconbuffer.h"
     55  1.1    oster 
     56  1.1    oster typedef struct RF_Raid1ConfigInfo_s {
     57  1.3    oster 	RF_RowCol_t **stripeIdentifier;
     58  1.3    oster }       RF_Raid1ConfigInfo_t;
     59  1.1    oster /* start of day code specific to RAID level 1 */
     60  1.3    oster int
     61  1.3    oster rf_ConfigureRAID1(
     62  1.3    oster     RF_ShutdownList_t ** listp,
     63  1.3    oster     RF_Raid_t * raidPtr,
     64  1.3    oster     RF_Config_t * cfgPtr)
     65  1.1    oster {
     66  1.3    oster 	RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
     67  1.3    oster 	RF_Raid1ConfigInfo_t *info;
     68  1.3    oster 	RF_RowCol_t i;
     69  1.3    oster 
     70  1.3    oster 	/* create a RAID level 1 configuration structure */
     71  1.3    oster 	RF_MallocAndAdd(info, sizeof(RF_Raid1ConfigInfo_t), (RF_Raid1ConfigInfo_t *), raidPtr->cleanupList);
     72  1.3    oster 	if (info == NULL)
     73  1.3    oster 		return (ENOMEM);
     74  1.3    oster 	layoutPtr->layoutSpecificInfo = (void *) info;
     75  1.3    oster 
     76  1.3    oster 	/* ... and fill it in. */
     77  1.3    oster 	info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol / 2, 2, raidPtr->cleanupList);
     78  1.3    oster 	if (info->stripeIdentifier == NULL)
     79  1.3    oster 		return (ENOMEM);
     80  1.3    oster 	for (i = 0; i < (raidPtr->numCol / 2); i++) {
     81  1.3    oster 		info->stripeIdentifier[i][0] = (2 * i);
     82  1.3    oster 		info->stripeIdentifier[i][1] = (2 * i) + 1;
     83  1.3    oster 	}
     84  1.3    oster 
     85  1.3    oster 	RF_ASSERT(raidPtr->numRow == 1);
     86  1.3    oster 
     87  1.3    oster 	/* this implementation of RAID level 1 uses one row of numCol disks
     88  1.3    oster 	 * and allows multiple (numCol / 2) stripes per row.  A stripe
     89  1.3    oster 	 * consists of a single data unit and a single parity (mirror) unit.
     90  1.3    oster 	 * stripe id = raidAddr / stripeUnitSize */
     91  1.3    oster 	raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2) * layoutPtr->sectorsPerStripeUnit;
     92  1.3    oster 	layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2);
     93  1.3    oster 	layoutPtr->dataSectorsPerStripe = layoutPtr->sectorsPerStripeUnit;
     94  1.3    oster 	layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
     95  1.3    oster 	layoutPtr->numDataCol = 1;
     96  1.3    oster 	layoutPtr->numParityCol = 1;
     97  1.3    oster 	return (0);
     98  1.1    oster }
     99  1.1    oster 
    100  1.1    oster 
    101  1.1    oster /* returns the physical disk location of the primary copy in the mirror pair */
    102  1.3    oster void
    103  1.3    oster rf_MapSectorRAID1(
    104  1.3    oster     RF_Raid_t * raidPtr,
    105  1.3    oster     RF_RaidAddr_t raidSector,
    106  1.3    oster     RF_RowCol_t * row,
    107  1.3    oster     RF_RowCol_t * col,
    108  1.3    oster     RF_SectorNum_t * diskSector,
    109  1.3    oster     int remap)
    110  1.1    oster {
    111  1.3    oster 	RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
    112  1.3    oster 	RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2);
    113  1.1    oster 
    114  1.3    oster 	*row = 0;
    115  1.3    oster 	*col = 2 * mirrorPair;
    116  1.3    oster 	*diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
    117  1.1    oster }
    118  1.1    oster 
    119  1.1    oster 
    120  1.1    oster /* Map Parity
    121  1.1    oster  *
    122  1.1    oster  * returns the physical disk location of the secondary copy in the mirror
    123  1.1    oster  * pair
    124  1.1    oster  */
    125  1.3    oster void
    126  1.3    oster rf_MapParityRAID1(
    127  1.3    oster     RF_Raid_t * raidPtr,
    128  1.3    oster     RF_RaidAddr_t raidSector,
    129  1.3    oster     RF_RowCol_t * row,
    130  1.3    oster     RF_RowCol_t * col,
    131  1.3    oster     RF_SectorNum_t * diskSector,
    132  1.3    oster     int remap)
    133  1.1    oster {
    134  1.3    oster 	RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
    135  1.3    oster 	RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2);
    136  1.1    oster 
    137  1.3    oster 	*row = 0;
    138  1.3    oster 	*col = (2 * mirrorPair) + 1;
    139  1.1    oster 
    140  1.3    oster 	*diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
    141  1.1    oster }
    142  1.1    oster 
    143  1.1    oster 
    144  1.1    oster /* IdentifyStripeRAID1
    145  1.1    oster  *
    146  1.1    oster  * returns a list of disks for a given redundancy group
    147  1.1    oster  */
    148  1.3    oster void
    149  1.3    oster rf_IdentifyStripeRAID1(
    150  1.3    oster     RF_Raid_t * raidPtr,
    151  1.3    oster     RF_RaidAddr_t addr,
    152  1.3    oster     RF_RowCol_t ** diskids,
    153  1.3    oster     RF_RowCol_t * outRow)
    154  1.1    oster {
    155  1.3    oster 	RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr);
    156  1.3    oster 	RF_Raid1ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo;
    157  1.3    oster 	RF_ASSERT(stripeID >= 0);
    158  1.3    oster 	RF_ASSERT(addr >= 0);
    159  1.3    oster 	*outRow = 0;
    160  1.3    oster 	*diskids = info->stripeIdentifier[stripeID % (raidPtr->numCol / 2)];
    161  1.3    oster 	RF_ASSERT(*diskids);
    162  1.1    oster }
    163  1.1    oster 
    164  1.1    oster 
    165  1.1    oster /* MapSIDToPSIDRAID1
    166  1.1    oster  *
    167  1.1    oster  * maps a logical stripe to a stripe in the redundant array
    168  1.1    oster  */
    169  1.3    oster void
    170  1.3    oster rf_MapSIDToPSIDRAID1(
    171  1.3    oster     RF_RaidLayout_t * layoutPtr,
    172  1.3    oster     RF_StripeNum_t stripeID,
    173  1.3    oster     RF_StripeNum_t * psID,
    174  1.3    oster     RF_ReconUnitNum_t * which_ru)
    175  1.1    oster {
    176  1.3    oster 	*which_ru = 0;
    177  1.3    oster 	*psID = stripeID;
    178  1.1    oster }
    179  1.1    oster 
    180  1.1    oster 
    181  1.1    oster 
    182  1.1    oster /******************************************************************************
    183  1.1    oster  * select a graph to perform a single-stripe access
    184  1.1    oster  *
    185  1.1    oster  * Parameters:  raidPtr    - description of the physical array
    186  1.1    oster  *              type       - type of operation (read or write) requested
    187  1.1    oster  *              asmap      - logical & physical addresses for this access
    188  1.1    oster  *              createFunc - name of function to use to create the graph
    189  1.1    oster  *****************************************************************************/
    190  1.1    oster 
    191  1.3    oster void
    192  1.3    oster rf_RAID1DagSelect(
    193  1.3    oster     RF_Raid_t * raidPtr,
    194  1.3    oster     RF_IoType_t type,
    195  1.3    oster     RF_AccessStripeMap_t * asmap,
    196  1.3    oster     RF_VoidFuncPtr * createFunc)
    197  1.1    oster {
    198  1.3    oster 	RF_RowCol_t frow, fcol, or, oc;
    199  1.3    oster 	RF_PhysDiskAddr_t *failedPDA;
    200  1.5    oster 	int     prior_recon;
    201  1.3    oster 	RF_RowStatus_t rstat;
    202  1.3    oster 	RF_SectorNum_t oo;
    203  1.3    oster 
    204  1.3    oster 
    205  1.3    oster 	RF_ASSERT(RF_IO_IS_R_OR_W(type));
    206  1.3    oster 
    207  1.3    oster 	if (asmap->numDataFailed + asmap->numParityFailed > 1) {
    208  1.3    oster 		RF_ERRORMSG("Multiple disks failed in a single group!  Aborting I/O operation.\n");
    209  1.3    oster 		*createFunc = NULL;
    210  1.3    oster 		return;
    211  1.3    oster 	}
    212  1.3    oster 	if (asmap->numDataFailed + asmap->numParityFailed) {
    213  1.3    oster 		/*
    214  1.3    oster 	         * We've got a fault. Re-map to spare space, iff applicable.
    215  1.3    oster 	         * Shouldn't the arch-independent code do this for us?
    216  1.3    oster 	         * Anyway, it turns out if we don't do this here, then when
    217  1.3    oster 	         * we're reconstructing, writes go only to the surviving
    218  1.3    oster 	         * original disk, and aren't reflected on the reconstructed
    219  1.3    oster 	         * spare. Oops. --jimz
    220  1.3    oster 	         */
    221  1.3    oster 		failedPDA = asmap->failedPDAs[0];
    222  1.3    oster 		frow = failedPDA->row;
    223  1.3    oster 		fcol = failedPDA->col;
    224  1.3    oster 		rstat = raidPtr->status[frow];
    225  1.3    oster 		prior_recon = (rstat == rf_rs_reconfigured) || (
    226  1.3    oster 		    (rstat == rf_rs_reconstructing) ?
    227  1.3    oster 		    rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0
    228  1.3    oster 		    );
    229  1.3    oster 		if (prior_recon) {
    230  1.3    oster 			or = frow;
    231  1.3    oster 			oc = fcol;
    232  1.3    oster 			oo = failedPDA->startSector;
    233  1.3    oster 			/*
    234  1.3    oster 		         * If we did distributed sparing, we'd monkey with that here.
    235  1.3    oster 		         * But we don't, so we'll
    236  1.3    oster 		         */
    237  1.3    oster 			failedPDA->row = raidPtr->Disks[frow][fcol].spareRow;
    238  1.3    oster 			failedPDA->col = raidPtr->Disks[frow][fcol].spareCol;
    239  1.3    oster 			/*
    240  1.3    oster 		         * Redirect other components, iff necessary. This looks
    241  1.3    oster 		         * pretty suspicious to me, but it's what the raid5
    242  1.3    oster 		         * DAG select does.
    243  1.3    oster 		         */
    244  1.3    oster 			if (asmap->parityInfo->next) {
    245  1.3    oster 				if (failedPDA == asmap->parityInfo) {
    246  1.3    oster 					failedPDA->next->row = failedPDA->row;
    247  1.3    oster 					failedPDA->next->col = failedPDA->col;
    248  1.3    oster 				} else {
    249  1.3    oster 					if (failedPDA == asmap->parityInfo->next) {
    250  1.3    oster 						asmap->parityInfo->row = failedPDA->row;
    251  1.3    oster 						asmap->parityInfo->col = failedPDA->col;
    252  1.3    oster 					}
    253  1.3    oster 				}
    254  1.3    oster 			}
    255  1.3    oster 			if (rf_dagDebug || rf_mapDebug) {
    256  1.5    oster 				printf("raid%d: Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n",
    257  1.5    oster 				       raidPtr->raidid, type, or, oc,
    258  1.5    oster 				       (long) oo, failedPDA->row,
    259  1.5    oster 				       failedPDA->col,
    260  1.5    oster 				       (long) failedPDA->startSector);
    261  1.3    oster 			}
    262  1.3    oster 			asmap->numDataFailed = asmap->numParityFailed = 0;
    263  1.3    oster 		}
    264  1.3    oster 	}
    265  1.3    oster 	if (type == RF_IO_TYPE_READ) {
    266  1.3    oster 		if (asmap->numDataFailed == 0)
    267  1.3    oster 			*createFunc = (RF_VoidFuncPtr) rf_CreateMirrorIdleReadDAG;
    268  1.3    oster 		else
    269  1.3    oster 			*createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneDegradedReadDAG;
    270  1.3    oster 	} else {
    271  1.3    oster 		*createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG;
    272  1.3    oster 	}
    273  1.1    oster }
    274  1.1    oster 
    275  1.3    oster int
    276  1.3    oster rf_VerifyParityRAID1(
    277  1.3    oster     RF_Raid_t * raidPtr,
    278  1.3    oster     RF_RaidAddr_t raidAddr,
    279  1.3    oster     RF_PhysDiskAddr_t * parityPDA,
    280  1.3    oster     int correct_it,
    281  1.3    oster     RF_RaidAccessFlags_t flags)
    282  1.1    oster {
    283  1.5    oster 	int     nbytes, bcount, stripeWidth, ret, i, j, nbad, *bbufs;
    284  1.3    oster 	RF_DagNode_t *blockNode, *unblockNode, *wrBlock;
    285  1.3    oster 	RF_DagHeader_t *rd_dag_h, *wr_dag_h;
    286  1.3    oster 	RF_AccessStripeMapHeader_t *asm_h;
    287  1.3    oster 	RF_AllocListElem_t *allocList;
    288  1.3    oster 	RF_AccTraceEntry_t tracerec;
    289  1.3    oster 	RF_ReconUnitNum_t which_ru;
    290  1.3    oster 	RF_RaidLayout_t *layoutPtr;
    291  1.3    oster 	RF_AccessStripeMap_t *aasm;
    292  1.3    oster 	RF_SectorCount_t nsector;
    293  1.3    oster 	RF_RaidAddr_t startAddr;
    294  1.3    oster 	char   *buf, *buf1, *buf2;
    295  1.3    oster 	RF_PhysDiskAddr_t *pda;
    296  1.3    oster 	RF_StripeNum_t psID;
    297  1.3    oster 	RF_MCPair_t *mcpair;
    298  1.3    oster 
    299  1.3    oster 	layoutPtr = &raidPtr->Layout;
    300  1.3    oster 	startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
    301  1.3    oster 	nsector = parityPDA->numSector;
    302  1.3    oster 	nbytes = rf_RaidAddressToByte(raidPtr, nsector);
    303  1.3    oster 	psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru);
    304  1.3    oster 
    305  1.3    oster 	asm_h = NULL;
    306  1.3    oster 	rd_dag_h = wr_dag_h = NULL;
    307  1.3    oster 	mcpair = NULL;
    308  1.3    oster 
    309  1.3    oster 	ret = RF_PARITY_COULD_NOT_VERIFY;
    310  1.3    oster 
    311  1.3    oster 	rf_MakeAllocList(allocList);
    312  1.3    oster 	if (allocList == NULL)
    313  1.3    oster 		return (RF_PARITY_COULD_NOT_VERIFY);
    314  1.3    oster 	mcpair = rf_AllocMCPair();
    315  1.3    oster 	if (mcpair == NULL)
    316  1.3    oster 		goto done;
    317  1.3    oster 	RF_ASSERT(layoutPtr->numDataCol == layoutPtr->numParityCol);
    318  1.3    oster 	stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol;
    319  1.3    oster 	bcount = nbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol);
    320  1.3    oster 	RF_MallocAndAdd(buf, bcount, (char *), allocList);
    321  1.3    oster 	if (buf == NULL)
    322  1.3    oster 		goto done;
    323  1.3    oster 	if (rf_verifyParityDebug) {
    324  1.5    oster 		printf("raid%d: RAID1 parity verify: buf=%lx bcount=%d (%lx - %lx)\n",
    325  1.5    oster 		       raidPtr->raidid, (long) buf, bcount, (long) buf,
    326  1.5    oster 		       (long) buf + bcount);
    327  1.3    oster 	}
    328  1.3    oster 	/*
    329  1.3    oster          * Generate a DAG which will read the entire stripe- then we can
    330  1.3    oster          * just compare data chunks versus "parity" chunks.
    331  1.3    oster          */
    332  1.3    oster 
    333  1.3    oster 	rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, nbytes, buf,
    334  1.3    oster 	    rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rod", allocList, flags,
    335  1.3    oster 	    RF_IO_NORMAL_PRIORITY);
    336  1.3    oster 	if (rd_dag_h == NULL)
    337  1.3    oster 		goto done;
    338  1.3    oster 	blockNode = rd_dag_h->succedents[0];
    339  1.3    oster 	unblockNode = blockNode->succedents[0]->succedents[0];
    340  1.3    oster 
    341  1.3    oster 	/*
    342  1.3    oster          * Map the access to physical disk addresses (PDAs)- this will
    343  1.3    oster          * get us both a list of data addresses, and "parity" addresses
    344  1.3    oster          * (which are really mirror copies).
    345  1.3    oster          */
    346  1.3    oster 	asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe,
    347  1.3    oster 	    buf, RF_DONT_REMAP);
    348  1.3    oster 	aasm = asm_h->stripeMap;
    349  1.3    oster 
    350  1.3    oster 	buf1 = buf;
    351  1.3    oster 	/*
    352  1.3    oster          * Loop through the data blocks, setting up read nodes for each.
    353  1.3    oster          */
    354  1.3    oster 	for (pda = aasm->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) {
    355  1.3    oster 		RF_ASSERT(pda);
    356  1.3    oster 
    357  1.3    oster 		rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
    358  1.3    oster 
    359  1.3    oster 		RF_ASSERT(pda->numSector != 0);
    360  1.3    oster 		if (rf_TryToRedirectPDA(raidPtr, pda, 0)) {
    361  1.3    oster 			/* cannot verify parity with dead disk */
    362  1.3    oster 			goto done;
    363  1.3    oster 		}
    364  1.3    oster 		pda->bufPtr = buf1;
    365  1.3    oster 		blockNode->succedents[i]->params[0].p = pda;
    366  1.3    oster 		blockNode->succedents[i]->params[1].p = buf1;
    367  1.3    oster 		blockNode->succedents[i]->params[2].v = psID;
    368  1.3    oster 		blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
    369  1.3    oster 		buf1 += nbytes;
    370  1.3    oster 	}
    371  1.3    oster 	RF_ASSERT(pda == NULL);
    372  1.3    oster 	/*
    373  1.3    oster          * keep i, buf1 running
    374  1.3    oster          *
    375  1.3    oster          * Loop through parity blocks, setting up read nodes for each.
    376  1.3    oster          */
    377  1.3    oster 	for (pda = aasm->parityInfo; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++, pda = pda->next) {
    378  1.3    oster 		RF_ASSERT(pda);
    379  1.3    oster 		rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
    380  1.3    oster 		RF_ASSERT(pda->numSector != 0);
    381  1.3    oster 		if (rf_TryToRedirectPDA(raidPtr, pda, 0)) {
    382  1.3    oster 			/* cannot verify parity with dead disk */
    383  1.3    oster 			goto done;
    384  1.3    oster 		}
    385  1.3    oster 		pda->bufPtr = buf1;
    386  1.3    oster 		blockNode->succedents[i]->params[0].p = pda;
    387  1.3    oster 		blockNode->succedents[i]->params[1].p = buf1;
    388  1.3    oster 		blockNode->succedents[i]->params[2].v = psID;
    389  1.3    oster 		blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
    390  1.3    oster 		buf1 += nbytes;
    391  1.3    oster 	}
    392  1.3    oster 	RF_ASSERT(pda == NULL);
    393  1.3    oster 
    394  1.6  thorpej 	memset((char *) &tracerec, 0, sizeof(tracerec));
    395  1.3    oster 	rd_dag_h->tracerec = &tracerec;
    396  1.3    oster 
    397  1.9    oster #if 0
    398  1.3    oster 	if (rf_verifyParityDebug > 1) {
    399  1.5    oster 		printf("raid%d: RAID1 parity verify read dag:\n",
    400  1.5    oster 		       raidPtr->raidid);
    401  1.3    oster 		rf_PrintDAGList(rd_dag_h);
    402  1.3    oster 	}
    403  1.9    oster #endif
    404  1.3    oster 	RF_LOCK_MUTEX(mcpair->mutex);
    405  1.3    oster 	mcpair->flag = 0;
    406  1.3    oster 	rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
    407  1.3    oster 	    (void *) mcpair);
    408  1.3    oster 	while (mcpair->flag == 0) {
    409  1.3    oster 		RF_WAIT_MCPAIR(mcpair);
    410  1.3    oster 	}
    411  1.3    oster 	RF_UNLOCK_MUTEX(mcpair->mutex);
    412  1.3    oster 
    413  1.3    oster 	if (rd_dag_h->status != rf_enable) {
    414  1.3    oster 		RF_ERRORMSG("Unable to verify raid1 parity: can't read stripe\n");
    415  1.3    oster 		ret = RF_PARITY_COULD_NOT_VERIFY;
    416  1.3    oster 		goto done;
    417  1.3    oster 	}
    418  1.3    oster 	/*
    419  1.3    oster          * buf1 is the beginning of the data blocks chunk
    420  1.3    oster          * buf2 is the beginning of the parity blocks chunk
    421  1.3    oster          */
    422  1.3    oster 	buf1 = buf;
    423  1.3    oster 	buf2 = buf + (nbytes * layoutPtr->numDataCol);
    424  1.3    oster 	ret = RF_PARITY_OKAY;
    425  1.3    oster 	/*
    426  1.3    oster          * bbufs is "bad bufs"- an array whose entries are the data
    427  1.3    oster          * column numbers where we had miscompares. (That is, column 0
    428  1.3    oster          * and column 1 of the array are mirror copies, and are considered
    429  1.3    oster          * "data column 0" for this purpose).
    430  1.3    oster          */
    431  1.3    oster 	RF_MallocAndAdd(bbufs, layoutPtr->numParityCol * sizeof(int), (int *),
    432  1.3    oster 	    allocList);
    433  1.3    oster 	nbad = 0;
    434  1.3    oster 	/*
    435  1.3    oster          * Check data vs "parity" (mirror copy).
    436  1.3    oster          */
    437  1.3    oster 	for (i = 0; i < layoutPtr->numDataCol; i++) {
    438  1.3    oster 		if (rf_verifyParityDebug) {
    439  1.5    oster 			printf("raid%d: RAID1 parity verify %d bytes: i=%d buf1=%lx buf2=%lx buf=%lx\n",
    440  1.5    oster 			       raidPtr->raidid, nbytes, i, (long) buf1,
    441  1.5    oster 			       (long) buf2, (long) buf);
    442  1.3    oster 		}
    443  1.7  thorpej 		ret = memcmp(buf1, buf2, nbytes);
    444  1.3    oster 		if (ret) {
    445  1.3    oster 			if (rf_verifyParityDebug > 1) {
    446  1.3    oster 				for (j = 0; j < nbytes; j++) {
    447  1.3    oster 					if (buf1[j] != buf2[j])
    448  1.3    oster 						break;
    449  1.3    oster 				}
    450  1.3    oster 				printf("psid=%ld j=%d\n", (long) psID, j);
    451  1.3    oster 				printf("buf1 %02x %02x %02x %02x %02x\n", buf1[0] & 0xff,
    452  1.3    oster 				    buf1[1] & 0xff, buf1[2] & 0xff, buf1[3] & 0xff, buf1[4] & 0xff);
    453  1.3    oster 				printf("buf2 %02x %02x %02x %02x %02x\n", buf2[0] & 0xff,
    454  1.3    oster 				    buf2[1] & 0xff, buf2[2] & 0xff, buf2[3] & 0xff, buf2[4] & 0xff);
    455  1.3    oster 			}
    456  1.3    oster 			if (rf_verifyParityDebug) {
    457  1.5    oster 				printf("raid%d: RAID1: found bad parity, i=%d\n", raidPtr->raidid, i);
    458  1.3    oster 			}
    459  1.3    oster 			/*
    460  1.3    oster 		         * Parity is bad. Keep track of which columns were bad.
    461  1.3    oster 		         */
    462  1.3    oster 			if (bbufs)
    463  1.3    oster 				bbufs[nbad] = i;
    464  1.3    oster 			nbad++;
    465  1.3    oster 			ret = RF_PARITY_BAD;
    466  1.3    oster 		}
    467  1.3    oster 		buf1 += nbytes;
    468  1.3    oster 		buf2 += nbytes;
    469  1.3    oster 	}
    470  1.3    oster 
    471  1.3    oster 	if ((ret != RF_PARITY_OKAY) && correct_it) {
    472  1.3    oster 		ret = RF_PARITY_COULD_NOT_CORRECT;
    473  1.3    oster 		if (rf_verifyParityDebug) {
    474  1.5    oster 			printf("raid%d: RAID1 parity verify: parity not correct\n", raidPtr->raidid);
    475  1.3    oster 		}
    476  1.3    oster 		if (bbufs == NULL)
    477  1.3    oster 			goto done;
    478  1.3    oster 		/*
    479  1.3    oster 	         * Make a DAG with one write node for each bad unit. We'll simply
    480  1.3    oster 	         * write the contents of the data unit onto the parity unit for
    481  1.3    oster 	         * correction. (It's possible that the mirror copy was the correct
    482  1.3    oster 	         * copy, and that we're spooging good data by writing bad over it,
    483  1.3    oster 	         * but there's no way we can know that.
    484  1.3    oster 	         */
    485  1.3    oster 		wr_dag_h = rf_MakeSimpleDAG(raidPtr, nbad, nbytes, buf,
    486  1.3    oster 		    rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wnp", allocList, flags,
    487  1.3    oster 		    RF_IO_NORMAL_PRIORITY);
    488  1.3    oster 		if (wr_dag_h == NULL)
    489  1.3    oster 			goto done;
    490  1.3    oster 		wrBlock = wr_dag_h->succedents[0];
    491  1.3    oster 		/*
    492  1.3    oster 	         * Fill in a write node for each bad compare.
    493  1.3    oster 	         */
    494  1.3    oster 		for (i = 0; i < nbad; i++) {
    495  1.3    oster 			j = i + layoutPtr->numDataCol;
    496  1.3    oster 			pda = blockNode->succedents[j]->params[0].p;
    497  1.3    oster 			pda->bufPtr = blockNode->succedents[i]->params[1].p;
    498  1.3    oster 			wrBlock->succedents[i]->params[0].p = pda;
    499  1.3    oster 			wrBlock->succedents[i]->params[1].p = pda->bufPtr;
    500  1.3    oster 			wrBlock->succedents[i]->params[2].v = psID;
    501  1.3    oster 			wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
    502  1.3    oster 		}
    503  1.6  thorpej 		memset((char *) &tracerec, 0, sizeof(tracerec));
    504  1.3    oster 		wr_dag_h->tracerec = &tracerec;
    505  1.9    oster #if 0
    506  1.3    oster 		if (rf_verifyParityDebug > 1) {
    507  1.3    oster 			printf("Parity verify write dag:\n");
    508  1.3    oster 			rf_PrintDAGList(wr_dag_h);
    509  1.3    oster 		}
    510  1.9    oster #endif
    511  1.3    oster 		RF_LOCK_MUTEX(mcpair->mutex);
    512  1.3    oster 		mcpair->flag = 0;
    513  1.3    oster 		/* fire off the write DAG */
    514  1.3    oster 		rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
    515  1.3    oster 		    (void *) mcpair);
    516  1.3    oster 		while (!mcpair->flag) {
    517  1.3    oster 			RF_WAIT_COND(mcpair->cond, mcpair->mutex);
    518  1.3    oster 		}
    519  1.3    oster 		RF_UNLOCK_MUTEX(mcpair->mutex);
    520  1.3    oster 		if (wr_dag_h->status != rf_enable) {
    521  1.3    oster 			RF_ERRORMSG("Unable to correct RAID1 parity in VerifyParity\n");
    522  1.3    oster 			goto done;
    523  1.3    oster 		}
    524  1.3    oster 		ret = RF_PARITY_CORRECTED;
    525  1.3    oster 	}
    526  1.1    oster done:
    527  1.3    oster 	/*
    528  1.3    oster          * All done. We might've gotten here without doing part of the function,
    529  1.3    oster          * so cleanup what we have to and return our running status.
    530  1.3    oster          */
    531  1.3    oster 	if (asm_h)
    532  1.3    oster 		rf_FreeAccessStripeMap(asm_h);
    533  1.3    oster 	if (rd_dag_h)
    534  1.3    oster 		rf_FreeDAG(rd_dag_h);
    535  1.3    oster 	if (wr_dag_h)
    536  1.3    oster 		rf_FreeDAG(wr_dag_h);
    537  1.3    oster 	if (mcpair)
    538  1.3    oster 		rf_FreeMCPair(mcpair);
    539  1.3    oster 	rf_FreeAllocList(allocList);
    540  1.3    oster 	if (rf_verifyParityDebug) {
    541  1.5    oster 		printf("raid%d: RAID1 parity verify, returning %d\n",
    542  1.5    oster 		       raidPtr->raidid, ret);
    543  1.3    oster 	}
    544  1.3    oster 	return (ret);
    545  1.1    oster }
    546  1.1    oster 
    547  1.3    oster int
    548  1.3    oster rf_SubmitReconBufferRAID1(rbuf, keep_it, use_committed)
    549  1.3    oster 	RF_ReconBuffer_t *rbuf;	/* the recon buffer to submit */
    550  1.3    oster 	int     keep_it;	/* whether we can keep this buffer or we have
    551  1.3    oster 				 * to return it */
    552  1.3    oster 	int     use_committed;	/* whether to use a committed or an available
    553  1.3    oster 				 * recon buffer */
    554  1.1    oster {
    555  1.3    oster 	RF_ReconParityStripeStatus_t *pssPtr;
    556  1.3    oster 	RF_ReconCtrl_t *reconCtrlPtr;
    557  1.3    oster 	RF_RaidLayout_t *layoutPtr;
    558  1.5    oster 	int     retcode, created;
    559  1.3    oster 	RF_CallbackDesc_t *cb, *p;
    560  1.3    oster 	RF_ReconBuffer_t *t;
    561  1.3    oster 	RF_Raid_t *raidPtr;
    562  1.3    oster 	caddr_t ta;
    563  1.3    oster 
    564  1.3    oster 	retcode = 0;
    565  1.3    oster 	created = 0;
    566  1.3    oster 
    567  1.3    oster 	raidPtr = rbuf->raidPtr;
    568  1.3    oster 	layoutPtr = &raidPtr->Layout;
    569  1.3    oster 	reconCtrlPtr = raidPtr->reconControl[rbuf->row];
    570  1.3    oster 
    571  1.3    oster 	RF_ASSERT(rbuf);
    572  1.3    oster 	RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
    573  1.3    oster 
    574  1.3    oster 	if (rf_reconbufferDebug) {
    575  1.5    oster 		printf("raid%d: RAID1 reconbuffer submission r%d c%d psid %ld ru%d (failed offset %ld)\n",
    576  1.5    oster 		       raidPtr->raidid, rbuf->row, rbuf->col,
    577  1.5    oster 		       (long) rbuf->parityStripeID, rbuf->which_ru,
    578  1.5    oster 		       (long) rbuf->failedDiskSectorOffset);
    579  1.3    oster 	}
    580  1.3    oster 	if (rf_reconDebug) {
    581  1.3    oster 		printf("RAID1 reconbuffer submit psid %ld buf %lx\n",
    582  1.3    oster 		    (long) rbuf->parityStripeID, (long) rbuf->buffer);
    583  1.3    oster 		printf("RAID1 psid %ld   %02x %02x %02x %02x %02x\n",
    584  1.3    oster 		    (long) rbuf->parityStripeID,
    585  1.3    oster 		    rbuf->buffer[0], rbuf->buffer[1], rbuf->buffer[2], rbuf->buffer[3],
    586  1.3    oster 		    rbuf->buffer[4]);
    587  1.3    oster 	}
    588  1.3    oster 	RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
    589  1.3    oster 
    590  1.3    oster 	RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
    591  1.3    oster 
    592  1.3    oster 	pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable,
    593  1.3    oster 	    rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
    594  1.3    oster 	RF_ASSERT(pssPtr);	/* if it didn't exist, we wouldn't have gotten
    595  1.3    oster 				 * an rbuf for it */
    596  1.3    oster 
    597  1.3    oster 	/*
    598  1.3    oster          * Since this is simple mirroring, the first submission for a stripe is also
    599  1.3    oster          * treated as the last.
    600  1.3    oster          */
    601  1.3    oster 
    602  1.3    oster 	t = NULL;
    603  1.3    oster 	if (keep_it) {
    604  1.3    oster 		if (rf_reconbufferDebug) {
    605  1.5    oster 			printf("raid%d: RAID1 rbuf submission: keeping rbuf\n",
    606  1.5    oster 			       raidPtr->raidid);
    607  1.3    oster 		}
    608  1.3    oster 		t = rbuf;
    609  1.3    oster 	} else {
    610  1.3    oster 		if (use_committed) {
    611  1.3    oster 			if (rf_reconbufferDebug) {
    612  1.5    oster 				printf("raid%d: RAID1 rbuf submission: using committed rbuf\n", raidPtr->raidid);
    613  1.3    oster 			}
    614  1.3    oster 			t = reconCtrlPtr->committedRbufs;
    615  1.3    oster 			RF_ASSERT(t);
    616  1.3    oster 			reconCtrlPtr->committedRbufs = t->next;
    617  1.3    oster 			t->next = NULL;
    618  1.3    oster 		} else
    619  1.3    oster 			if (reconCtrlPtr->floatingRbufs) {
    620  1.3    oster 				if (rf_reconbufferDebug) {
    621  1.5    oster 					printf("raid%d: RAID1 rbuf submission: using floating rbuf\n", raidPtr->raidid);
    622  1.3    oster 				}
    623  1.3    oster 				t = reconCtrlPtr->floatingRbufs;
    624  1.3    oster 				reconCtrlPtr->floatingRbufs = t->next;
    625  1.3    oster 				t->next = NULL;
    626  1.3    oster 			}
    627  1.3    oster 	}
    628  1.3    oster 	if (t == NULL) {
    629  1.3    oster 		if (rf_reconbufferDebug) {
    630  1.5    oster 			printf("raid%d: RAID1 rbuf submission: waiting for rbuf\n", raidPtr->raidid);
    631  1.3    oster 		}
    632  1.3    oster 		RF_ASSERT((keep_it == 0) && (use_committed == 0));
    633  1.3    oster 		raidPtr->procsInBufWait++;
    634  1.3    oster 		if ((raidPtr->procsInBufWait == (raidPtr->numCol - 1))
    635  1.3    oster 		    && (raidPtr->numFullReconBuffers == 0)) {
    636  1.3    oster 			/* ruh-ro */
    637  1.3    oster 			RF_ERRORMSG("Buffer wait deadlock\n");
    638  1.3    oster 			rf_PrintPSStatusTable(raidPtr, rbuf->row);
    639  1.3    oster 			RF_PANIC();
    640  1.3    oster 		}
    641  1.3    oster 		pssPtr->flags |= RF_PSS_BUFFERWAIT;
    642  1.3    oster 		cb = rf_AllocCallbackDesc();
    643  1.3    oster 		cb->row = rbuf->row;
    644  1.3    oster 		cb->col = rbuf->col;
    645  1.3    oster 		cb->callbackArg.v = rbuf->parityStripeID;
    646  1.3    oster 		cb->callbackArg2.v = rbuf->which_ru;
    647  1.3    oster 		cb->next = NULL;
    648  1.3    oster 		if (reconCtrlPtr->bufferWaitList == NULL) {
    649  1.3    oster 			/* we are the wait list- lucky us */
    650  1.3    oster 			reconCtrlPtr->bufferWaitList = cb;
    651  1.3    oster 		} else {
    652  1.3    oster 			/* append to wait list */
    653  1.3    oster 			for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next);
    654  1.3    oster 			p->next = cb;
    655  1.3    oster 		}
    656  1.3    oster 		retcode = 1;
    657  1.3    oster 		goto out;
    658  1.3    oster 	}
    659  1.3    oster 	if (t != rbuf) {
    660  1.3    oster 		t->row = rbuf->row;
    661  1.3    oster 		t->col = reconCtrlPtr->fcol;
    662  1.3    oster 		t->parityStripeID = rbuf->parityStripeID;
    663  1.3    oster 		t->which_ru = rbuf->which_ru;
    664  1.3    oster 		t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
    665  1.3    oster 		t->spRow = rbuf->spRow;
    666  1.3    oster 		t->spCol = rbuf->spCol;
    667  1.3    oster 		t->spOffset = rbuf->spOffset;
    668  1.3    oster 		/* Swap buffers. DANCE! */
    669  1.3    oster 		ta = t->buffer;
    670  1.3    oster 		t->buffer = rbuf->buffer;
    671  1.3    oster 		rbuf->buffer = ta;
    672  1.3    oster 	}
    673  1.3    oster 	/*
    674  1.3    oster          * Use the rbuf we've been given as the target.
    675  1.3    oster          */
    676  1.3    oster 	RF_ASSERT(pssPtr->rbuf == NULL);
    677  1.3    oster 	pssPtr->rbuf = t;
    678  1.3    oster 
    679  1.3    oster 	t->count = 1;
    680  1.3    oster 	/*
    681  1.3    oster          * Below, we use 1 for numDataCol (which is equal to the count in the
    682  1.3    oster          * previous line), so we'll always be done.
    683  1.3    oster          */
    684  1.3    oster 	rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, 1);
    685  1.1    oster 
    686  1.1    oster out:
    687  1.3    oster 	RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
    688  1.3    oster 	RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
    689  1.3    oster 	if (rf_reconbufferDebug) {
    690  1.5    oster 		printf("raid%d: RAID1 rbuf submission: returning %d\n",
    691  1.5    oster 		       raidPtr->raidid, retcode);
    692  1.3    oster 	}
    693  1.3    oster 	return (retcode);
    694  1.1    oster }
    695