Home | History | Annotate | Line # | Download | only in raidframe
rf_evenodd_dagfuncs.c revision 1.3
      1 /*	$NetBSD: rf_evenodd_dagfuncs.c,v 1.3 1999/08/13 03:41:56 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: ChangMing Wu
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  * Code for RAID-EVENODD  architecture.
     31  */
     32 
     33 #include "rf_types.h"
     34 #include "rf_raid.h"
     35 #include "rf_dag.h"
     36 #include "rf_dagffrd.h"
     37 #include "rf_dagffwr.h"
     38 #include "rf_dagdegrd.h"
     39 #include "rf_dagdegwr.h"
     40 #include "rf_dagutils.h"
     41 #include "rf_dagfuncs.h"
     42 #include "rf_threadid.h"
     43 #include "rf_etimer.h"
     44 #include "rf_general.h"
     45 #include "rf_configure.h"
     46 #include "rf_parityscan.h"
     47 #include "rf_evenodd.h"
     48 #include "rf_evenodd_dagfuncs.h"
     49 
     50 /* These redundant functions are for small write */
     51 RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P"};
     52 RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E"};
     53 /* These redundant functions are for degraded read */
     54 RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
     55 RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func"};
     56 /**********************************************************************************************
     57  *   the following encoding node functions is used in  EO_000_CreateLargeWriteDAG
     58  **********************************************************************************************/
     59 int
     60 rf_RegularPEFunc(node)
     61 	RF_DagNode_t *node;
     62 {
     63 	rf_RegularESubroutine(node, node->results[1]);
     64 	rf_RegularXorFunc(node);/* does the wakeup here! */
     65 #if 1
     66 	return (0);		/* XXX This was missing... GO */
     67 #endif
     68 }
     69 
     70 
     71 /************************************************************************************************
     72  *  For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
     73  *  be used. The previous case is when write access at least sectors of full stripe unit.
     74  *  The later function is used when the write access two stripe units but with total sectors
     75  *  less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
     76  *  areas in their stripe unit and  parity write and 'E' write are both devided into two distinct
     77  *  writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
     78  ************************************************************************************************/
     79 
     80 /* Algorithm:
     81      1. Store the difference of old data and new data in the Rod buffer.
     82      2. then encode this buffer into the buffer which already have old 'E' information inside it,
     83 	the result can be shown to be the new 'E' information.
     84      3. xor the Wnd buffer into the difference buffer to recover the  original old data.
     85    Here we have another alternative: to allocate a temporary buffer for storing the difference of
     86    old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
     87    take the same speed as the previous, and need more memory.
     88 */
     89 int
     90 rf_RegularONEFunc(node)
     91 	RF_DagNode_t *node;
     92 {
     93 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
     94 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
     95 	int     EpdaIndex = (node->numParams - 1) / 2 - 1;	/* the parameter of node
     96 								 * where you can find
     97 								 * e-pda */
     98 	int     i, k, retcode = 0;
     99 	int     suoffset, length;
    100 	RF_RowCol_t scol;
    101 	char   *srcbuf, *destbuf;
    102 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    103 	RF_Etimer_t timer;
    104 	RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
    105 	int     ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);	/* generally zero  */
    106 
    107 	RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
    108 	RF_ASSERT(ESUOffset == 0);
    109 
    110 	RF_ETIMER_START(timer);
    111 
    112 	/* Xor the Wnd buffer into Rod buffer, the difference of old data and
    113 	 * new data is stored in Rod buffer */
    114 	for (k = 0; k < EpdaIndex; k += 2) {
    115 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
    116 		retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp);
    117 	}
    118 	/* Start to encoding the buffer storing the difference of old data and
    119 	 * new data into 'E' buffer  */
    120 	for (i = 0; i < EpdaIndex; i += 2)
    121 		if (node->params[i + 1].p != node->results[0]) {	/* results[0] is buf ptr
    122 									 * of E */
    123 			pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    124 			srcbuf = (char *) node->params[i + 1].p;
    125 			scol = rf_EUCol(layoutPtr, pda->raidAddress);
    126 			suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    127 			destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset);
    128 			rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    129 		}
    130 	/* Recover the original old data to be used by parity encoding
    131 	 * function in XorNode */
    132 	for (k = 0; k < EpdaIndex; k += 2) {
    133 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
    134 		retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp);
    135 	}
    136 	RF_ETIMER_STOP(timer);
    137 	RF_ETIMER_EVAL(timer);
    138 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    139 	rf_GenericWakeupFunc(node, 0);
    140 #if 1
    141 	return (0);		/* XXX this was missing.. GO */
    142 #endif
    143 }
    144 
    145 int
    146 rf_SimpleONEFunc(node)
    147 	RF_DagNode_t *node;
    148 {
    149 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    150 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    151 	RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
    152 	int     retcode = 0;
    153 	char   *srcbuf, *destbuf;
    154 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    155 	int     length;
    156 	RF_RowCol_t scol;
    157 	RF_Etimer_t timer;
    158 
    159 	RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q);
    160 	if (node->dagHdr->status == rf_enable) {
    161 		RF_ETIMER_START(timer);
    162 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector);	/* this is a pda of
    163 														 * writeDataNodes */
    164 		/* bxor to buffer of readDataNodes */
    165 		retcode = rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
    166 		/* find out the corresponding colume in encoding matrix for
    167 		 * write colume to be encoded into redundant disk 'E' */
    168 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    169 		srcbuf = node->params[1].p;
    170 		destbuf = node->params[3].p;
    171 		/* Start encoding process */
    172 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    173 		rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
    174 		RF_ETIMER_STOP(timer);
    175 		RF_ETIMER_EVAL(timer);
    176 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    177 
    178 	}
    179 	return (rf_GenericWakeupFunc(node, retcode));	/* call wake func
    180 							 * explicitly since no
    181 							 * I/O in this node */
    182 }
    183 
    184 
    185 /****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write  ********/
    186 void
    187 rf_RegularESubroutine(node, ebuf)
    188 	RF_DagNode_t *node;
    189 	char   *ebuf;
    190 {
    191 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    192 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    193 	RF_PhysDiskAddr_t *pda;
    194 	int     i, suoffset;
    195 	RF_RowCol_t scol;
    196 	char   *srcbuf, *destbuf;
    197 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    198 	RF_Etimer_t timer;
    199 
    200 	RF_ETIMER_START(timer);
    201 	for (i = 0; i < node->numParams - 2; i += 2) {
    202 		RF_ASSERT(node->params[i + 1].p != ebuf);
    203 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    204 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    205 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    206 		srcbuf = (char *) node->params[i + 1].p;
    207 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset);
    208 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    209 	}
    210 	RF_ETIMER_STOP(timer);
    211 	RF_ETIMER_EVAL(timer);
    212 	tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    213 }
    214 
    215 
    216 /*******************************************************************************************
    217  *			 Used in  EO_001_CreateLargeWriteDAG
    218  ******************************************************************************************/
    219 int
    220 rf_RegularEFunc(node)
    221 	RF_DagNode_t *node;
    222 {
    223 	rf_RegularESubroutine(node, node->results[0]);
    224 	rf_GenericWakeupFunc(node, 0);
    225 #if 1
    226 	return (0);		/* XXX this was missing?.. GO */
    227 #endif
    228 }
    229 /*******************************************************************************************
    230  * This degraded function allow only two case:
    231  *  1. when write access the full failed stripe unit, then the access can be more than
    232  *     one tripe units.
    233  *  2. when write access only part of the failed SU, we assume accesses of more than
    234  *     one stripe unit is not allowed so that the write can be dealt with like a
    235  *     large write.
    236  *  The following function is based on these assumptions. So except in the second case,
    237  *  it looks the same as a large write encodeing function. But this is not exactly the
    238  *  normal way for doing a degraded write, since raidframe have to break cases of access
    239  *  other than the above two into smaller accesses. We may have to change
    240  *  DegrESubroutin in the future.
    241  *******************************************************************************************/
    242 void
    243 rf_DegrESubroutine(node, ebuf)
    244 	RF_DagNode_t *node;
    245 	char   *ebuf;
    246 {
    247 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    248 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    249 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    250 	RF_PhysDiskAddr_t *pda;
    251 	int     i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    252 	RF_RowCol_t scol;
    253 	char   *srcbuf, *destbuf;
    254 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    255 	RF_Etimer_t timer;
    256 
    257 	RF_ETIMER_START(timer);
    258 	for (i = 0; i < node->numParams - 2; i += 2) {
    259 		RF_ASSERT(node->params[i + 1].p != ebuf);
    260 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    261 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    262 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    263 		srcbuf = (char *) node->params[i + 1].p;
    264 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    265 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    266 	}
    267 
    268 	RF_ETIMER_STOP(timer);
    269 	RF_ETIMER_EVAL(timer);
    270 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    271 }
    272 
    273 
    274 /**************************************************************************************
    275  * This function is used in case where one data disk failed and both redundant disks
    276  * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
    277  * failed in the stripe but not accessed at this time, then we should, instead, use
    278  * the rf_EOWriteDoubleRecoveryFunc().
    279  **************************************************************************************/
    280 int
    281 rf_Degraded_100_EOFunc(node)
    282 	RF_DagNode_t *node;
    283 {
    284 	rf_DegrESubroutine(node, node->results[1]);
    285 	rf_RecoveryXorFunc(node);	/* does the wakeup here! */
    286 #if 1
    287 	return (0);		/* XXX this was missing... SHould these be
    288 				 * void functions??? GO */
    289 #endif
    290 }
    291 /**************************************************************************************
    292  * This function is to encode one sector in one of the data disks to the E disk.
    293  * However, in evenodd this function can also be used as decoding function to recover
    294  * data from dead disk in the case of parity failure and a single data failure.
    295  **************************************************************************************/
    296 void
    297 rf_e_EncOneSect(
    298     RF_RowCol_t srcLogicCol,
    299     char *srcSecbuf,
    300     RF_RowCol_t destLogicCol,
    301     char *destSecbuf,
    302     int bytesPerSector)
    303 {
    304 	int     S_index;	/* index of the EU in the src col which need
    305 				 * be Xored into all EUs in a dest sector */
    306 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
    307 	RF_RowCol_t j, indexInDest,	/* row index of an encoding unit in
    308 					 * the destination colume of encoding
    309 					 * matrix */
    310 	        indexInSrc;	/* row index of an encoding unit in the source
    311 				 * colume used for recovery */
    312 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
    313 
    314 #if RF_EO_MATRIX_DIM > 17
    315 	int     shortsPerEU = bytesPerEU / sizeof(short);
    316 	short  *destShortBuf, *srcShortBuf1, *srcShortBuf2;
    317 	register short temp1;
    318 #elif RF_EO_MATRIX_DIM == 17
    319 	int     longsPerEU = bytesPerEU / sizeof(long);
    320 	long   *destLongBuf, *srcLongBuf1, *srcLongBuf2;
    321 	register long temp1;
    322 #endif
    323 
    324 #if RF_EO_MATRIX_DIM > 17
    325 	RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1);
    326 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
    327 #elif RF_EO_MATRIX_DIM == 17
    328 	RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4);
    329 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
    330 #endif
    331 
    332 	S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    333 #if RF_EO_MATRIX_DIM > 17
    334 	srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU);
    335 #elif RF_EO_MATRIX_DIM == 17
    336 	srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU);
    337 #endif
    338 
    339 	for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) {
    340 		indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    341 
    342 #if RF_EO_MATRIX_DIM > 17
    343 		destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU);
    344 		srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU);
    345 		for (j = 0; j < shortsPerEU; j++) {
    346 			temp1 = destShortBuf[j] ^ srcShortBuf1[j];
    347 			/* note: S_index won't be at the end row for any src
    348 			 * col! */
    349 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
    350 				destShortBuf[j] = (srcShortBuf2[j]) ^ temp1;
    351 			/* if indexInSrc is at the end row, ie.
    352 			 * RF_EO_MATRIX_DIM -1, then all elements are zero! */
    353 			else
    354 				destShortBuf[j] = temp1;
    355 		}
    356 
    357 #elif RF_EO_MATRIX_DIM == 17
    358 		destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU);
    359 		srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU);
    360 		for (j = 0; j < longsPerEU; j++) {
    361 			temp1 = destLongBuf[j] ^ srcLongBuf1[j];
    362 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
    363 				destLongBuf[j] = (srcLongBuf2[j]) ^ temp1;
    364 			else
    365 				destLongBuf[j] = temp1;
    366 		}
    367 #endif
    368 	}
    369 }
    370 
    371 void
    372 rf_e_encToBuf(
    373     RF_Raid_t * raidPtr,
    374     RF_RowCol_t srcLogicCol,
    375     char *srcbuf,
    376     RF_RowCol_t destLogicCol,
    377     char *destbuf,
    378     int numSector)
    379 {
    380 	int     i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    381 
    382 	for (i = 0; i < numSector; i++) {
    383 		rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
    384 		srcbuf += bytesPerSector;
    385 		destbuf += bytesPerSector;
    386 	}
    387 }
    388 /**************************************************************************************
    389  * when parity die and one data die, We use second redundant information, 'E',
    390  * to recover the data in dead disk. This function is used in the recovery node of
    391  * for EO_110_CreateReadDAG
    392  **************************************************************************************/
    393 int
    394 rf_RecoveryEFunc(node)
    395 	RF_DagNode_t *node;
    396 {
    397 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    398 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    399 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    400 	RF_RowCol_t scol,	/* source logical column */
    401 	        fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress);	/* logical column of
    402 									 * failed SU */
    403 	int     i;
    404 	RF_PhysDiskAddr_t *pda;
    405 	int     suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    406 	char   *srcbuf, *destbuf;
    407 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    408 	RF_Etimer_t timer;
    409 
    410 	bzero((char *) node->results[0], rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
    411 	if (node->dagHdr->status == rf_enable) {
    412 		RF_ETIMER_START(timer);
    413 		for (i = 0; i < node->numParams - 2; i += 2)
    414 			if (node->params[i + 1].p != node->results[0]) {
    415 				pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    416 				if (i == node->numParams - 4)
    417 					scol = RF_EO_MATRIX_DIM - 2;	/* the colume of
    418 									 * redundant E */
    419 				else
    420 					scol = rf_EUCol(layoutPtr, pda->raidAddress);
    421 				srcbuf = (char *) node->params[i + 1].p;
    422 				suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    423 				destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    424 				rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
    425 			}
    426 		RF_ETIMER_STOP(timer);
    427 		RF_ETIMER_EVAL(timer);
    428 		tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    429 	}
    430 	return (rf_GenericWakeupFunc(node, 0));	/* node execute successfully */
    431 }
    432 /**************************************************************************************
    433  * This function is used in the case where one data and the parity have filed.
    434  * (in EO_110_CreateWriteDAG )
    435  **************************************************************************************/
    436 int
    437 rf_EO_DegradedWriteEFunc(RF_DagNode_t * node)
    438 {
    439 	rf_DegrESubroutine(node, node->results[0]);
    440 	rf_GenericWakeupFunc(node, 0);
    441 #if 1
    442 	return (0);		/* XXX Yet another one!! GO */
    443 #endif
    444 }
    445 
    446 
    447 
    448 /**************************************************************************************
    449  *  		THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
    450  **************************************************************************************/
    451 
    452 void
    453 rf_doubleEOdecode(
    454     RF_Raid_t * raidPtr,
    455     char **rrdbuf,
    456     char **dest,
    457     RF_RowCol_t * fcol,
    458     char *pbuf,
    459     char *ebuf)
    460 {
    461 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    462 	int     i, j, k, f1, f2, row;
    463 	int     rrdrow, erow, count = 0;
    464 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    465 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
    466 #if 0
    467 	int     pcol = (RF_EO_MATRIX_DIM) - 1;
    468 #endif
    469 	int     ecol = (RF_EO_MATRIX_DIM) - 2;
    470 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
    471 	int     numDataCol = layoutPtr->numDataCol;
    472 #if RF_EO_MATRIX_DIM > 17
    473 	int     shortsPerEU = bytesPerEU / sizeof(short);
    474 	short  *rrdbuf_current, *pbuf_current, *ebuf_current;
    475 	short  *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    476 	register short *temp;
    477 	short  *P;
    478 
    479 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
    480 	RF_Malloc(P, bytesPerEU, (short *));
    481 	RF_Malloc(temp, bytesPerEU, (short *));
    482 #elif RF_EO_MATRIX_DIM == 17
    483 	int     longsPerEU = bytesPerEU / sizeof(long);
    484 	long   *rrdbuf_current, *pbuf_current, *ebuf_current;
    485 	long   *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    486 	register long *temp;
    487 	long   *P;
    488 
    489 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
    490 	RF_Malloc(P, bytesPerEU, (long *));
    491 	RF_Malloc(temp, bytesPerEU, (long *));
    492 #endif
    493 	RF_ASSERT(*((long *) dest[0]) == 0);
    494 	RF_ASSERT(*((long *) dest[1]) == 0);
    495 	bzero((char *) P, bytesPerEU);
    496 	bzero((char *) temp, bytesPerEU);
    497 	RF_ASSERT(*P == 0);
    498 	/* calculate the 'P' parameter, which, not parity, is the Xor of all
    499 	 * elements in the last two column, ie. 'E' and 'parity' colume, see
    500 	 * the Ref. paper by Blaum, et al 1993  */
    501 	for (i = 0; i < numRowInEncMatix; i++)
    502 		for (k = 0; k < longsPerEU; k++) {
    503 #if RF_EO_MATRIX_DIM > 17
    504 			ebuf_current = ((short *) ebuf) + i * shortsPerEU + k;
    505 			pbuf_current = ((short *) pbuf) + i * shortsPerEU + k;
    506 #elif RF_EO_MATRIX_DIM == 17
    507 			ebuf_current = ((long *) ebuf) + i * longsPerEU + k;
    508 			pbuf_current = ((long *) pbuf) + i * longsPerEU + k;
    509 #endif
    510 			P[k] ^= *ebuf_current;
    511 			P[k] ^= *pbuf_current;
    512 		}
    513 	RF_ASSERT(fcol[0] != fcol[1]);
    514 	if (fcol[0] < fcol[1]) {
    515 #if RF_EO_MATRIX_DIM > 17
    516 		dest_smaller = (short *) (dest[0]);
    517 		dest_larger = (short *) (dest[1]);
    518 #elif RF_EO_MATRIX_DIM == 17
    519 		dest_smaller = (long *) (dest[0]);
    520 		dest_larger = (long *) (dest[1]);
    521 #endif
    522 		f1 = fcol[0];
    523 		f2 = fcol[1];
    524 	} else {
    525 #if RF_EO_MATRIX_DIM > 17
    526 		dest_smaller = (short *) (dest[1]);
    527 		dest_larger = (short *) (dest[0]);
    528 #elif RF_EO_MATRIX_DIM == 17
    529 		dest_smaller = (long *) (dest[1]);
    530 		dest_larger = (long *) (dest[0]);
    531 #endif
    532 		f1 = fcol[1];
    533 		f2 = fcol[0];
    534 	}
    535 	row = (RF_EO_MATRIX_DIM) - 1;
    536 	while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) {
    537 #if RF_EO_MATRIX_DIM > 17
    538 		dest_larger_current = dest_larger + row * shortsPerEU;
    539 		dest_smaller_current = dest_smaller + row * shortsPerEU;
    540 #elif RF_EO_MATRIX_DIM == 17
    541 		dest_larger_current = dest_larger + row * longsPerEU;
    542 		dest_smaller_current = dest_smaller + row * longsPerEU;
    543 #endif
    544 		/**    Do the diagonal recovery. Initially, temp[k] = (failed 1),
    545 		       which is the failed data in the colume which has smaller col index. **/
    546 		/* step 1:  ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3))         */
    547 		for (j = 0; j < numDataCol; j++) {
    548 			if (j == f1 || j == f2)
    549 				continue;
    550 			rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM);
    551 			if (rrdrow != (RF_EO_MATRIX_DIM) - 1) {
    552 #if RF_EO_MATRIX_DIM > 17
    553 				rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU;
    554 				for (k = 0; k < shortsPerEU; k++)
    555 					temp[k] ^= *(rrdbuf_current + k);
    556 #elif RF_EO_MATRIX_DIM == 17
    557 				rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU;
    558 				for (k = 0; k < longsPerEU; k++)
    559 					temp[k] ^= *(rrdbuf_current + k);
    560 #endif
    561 			}
    562 		}
    563 		/* step 2:  ^E(erow,m-2), If erow is at the buttom row, don't
    564 		 * Xor into it  E(erow,m-2) = (principle diagonal) ^ (failed
    565 		 * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal
    566 		 * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle
    567 		 * diagonal) ^ (failed 2)       */
    568 
    569 		erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM));
    570 		if (erow != (RF_EO_MATRIX_DIM) - 1) {
    571 #if RF_EO_MATRIX_DIM > 17
    572 			ebuf_current = (short *) ebuf + shortsPerEU * erow;
    573 			for (k = 0; k < shortsPerEU; k++)
    574 				temp[k] ^= *(ebuf_current + k);
    575 #elif RF_EO_MATRIX_DIM == 17
    576 			ebuf_current = (long *) ebuf + longsPerEU * erow;
    577 			for (k = 0; k < longsPerEU; k++)
    578 				temp[k] ^= *(ebuf_current + k);
    579 #endif
    580 		}
    581 		/* step 3: ^P to obtain the failed data (failed 2).  P can be
    582 		 * proved to be actually  (principle diagonal)  After this
    583 		 * step, temp[k] = (failed 2), the failed data to be recovered */
    584 #if RF_EO_MATRIX_DIM > 17
    585 		for (k = 0; k < shortsPerEU; k++)
    586 			temp[k] ^= P[k];
    587 		/* Put the data to the destination buffer                              */
    588 		for (k = 0; k < shortsPerEU; k++)
    589 			dest_larger_current[k] = temp[k];
    590 #elif RF_EO_MATRIX_DIM == 17
    591 		for (k = 0; k < longsPerEU; k++)
    592 			temp[k] ^= P[k];
    593 		/* Put the data to the destination buffer                              */
    594 		for (k = 0; k < longsPerEU; k++)
    595 			dest_larger_current[k] = temp[k];
    596 #endif
    597 
    598 		/**          THE FOLLOWING DO THE HORIZONTAL XOR                **/
    599 		/* step 1:  ^(SUM of A(row,0..m-3)), ie. all nonfailed data
    600 		 * columes    */
    601 		for (j = 0; j < numDataCol; j++) {
    602 			if (j == f1 || j == f2)
    603 				continue;
    604 #if RF_EO_MATRIX_DIM > 17
    605 			rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU;
    606 			for (k = 0; k < shortsPerEU; k++)
    607 				temp[k] ^= *(rrdbuf_current + k);
    608 #elif RF_EO_MATRIX_DIM == 17
    609 			rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU;
    610 			for (k = 0; k < longsPerEU; k++)
    611 				temp[k] ^= *(rrdbuf_current + k);
    612 #endif
    613 		}
    614 		/* step 2: ^A(row,m-1) */
    615 		/* step 3: Put the data to the destination buffer                             	 */
    616 #if RF_EO_MATRIX_DIM > 17
    617 		pbuf_current = (short *) pbuf + shortsPerEU * row;
    618 		for (k = 0; k < shortsPerEU; k++)
    619 			temp[k] ^= *(pbuf_current + k);
    620 		for (k = 0; k < shortsPerEU; k++)
    621 			dest_smaller_current[k] = temp[k];
    622 #elif RF_EO_MATRIX_DIM == 17
    623 		pbuf_current = (long *) pbuf + longsPerEU * row;
    624 		for (k = 0; k < longsPerEU; k++)
    625 			temp[k] ^= *(pbuf_current + k);
    626 		for (k = 0; k < longsPerEU; k++)
    627 			dest_smaller_current[k] = temp[k];
    628 #endif
    629 		count++;
    630 	}
    631 	/* Check if all Encoding Unit in the data buffer have been decoded,
    632 	 * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
    633 	 * this algorithm will covered all buffer 				 */
    634 	RF_ASSERT(count == numRowInEncMatix);
    635 	RF_Free((char *) P, bytesPerEU);
    636 	RF_Free((char *) temp, bytesPerEU);
    637 }
    638 
    639 
    640 /***************************************************************************************
    641 * 	This function is called by double degragded read
    642 * 	EO_200_CreateReadDAG
    643 *
    644 ***************************************************************************************/
    645 int
    646 rf_EvenOddDoubleRecoveryFunc(node)
    647 	RF_DagNode_t *node;
    648 {
    649 	int     ndataParam = 0;
    650 	int     np = node->numParams;
    651 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    652 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    653 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    654 	int     i, prm, sector, nresults = node->numResults;
    655 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    656 	unsigned sosAddr;
    657 	int     two = 0, mallc_one = 0, mallc_two = 0;	/* flags to indicate if
    658 							 * memory is allocated */
    659 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    660 	RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1,
    661 	        npda;
    662 	RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
    663 	char  **buf, *ebuf, *pbuf, *dest[2];
    664 	long   *suoff = NULL, *suend = NULL, *prmToCol = NULL, psuoff, esuoff;
    665 	RF_SectorNum_t startSector, endSector;
    666 	RF_Etimer_t timer;
    667 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    668 
    669 	RF_ETIMER_START(timer);
    670 
    671 	/* Find out the number of parameters which are pdas for data
    672 	 * information */
    673 	for (i = 0; i <= np; i++)
    674 		if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) {
    675 			ndataParam = i;
    676 			break;
    677 		}
    678 	RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
    679 	if (ndataParam != 0) {
    680 		RF_Malloc(suoff, ndataParam * sizeof(long), (long *));
    681 		RF_Malloc(suend, ndataParam * sizeof(long), (long *));
    682 		RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *));
    683 	}
    684 	if (asmap->failedPDAs[1] &&
    685 	    (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
    686 		RF_ASSERT(0);	/* currently, no support for this situation */
    687 		ppda = node->params[np - 6].p;
    688 		ppda2 = node->params[np - 5].p;
    689 		RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY);
    690 		epda = node->params[np - 4].p;
    691 		epda2 = node->params[np - 3].p;
    692 		RF_ASSERT(epda2->type == RF_PDA_TYPE_Q);
    693 		two = 1;
    694 	} else {
    695 		ppda = node->params[np - 4].p;
    696 		epda = node->params[np - 3].p;
    697 		psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
    698 		esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
    699 		RF_ASSERT(psuoff == esuoff);
    700 	}
    701 	/*
    702             the followings have three goals:
    703             1. determine the startSector to begin decoding and endSector to end decoding.
    704             2. determine the colume numbers of the two failed disks.
    705             3. determine the offset and end offset of the access within each failed stripe unit.
    706          */
    707 	if (nresults == 1) {
    708 		/* find the startSector to begin decoding */
    709 		pda = node->results[0];
    710 		bzero(pda->bufPtr, bytesPerSector * pda->numSector);
    711 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    712 		fsuend[0] = fsuoff[0] + pda->numSector;
    713 		startSector = fsuoff[0];
    714 		endSector = fsuend[0];
    715 
    716 		/* find out the the column of failed disk being accessed */
    717 		fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress);
    718 
    719 		/* find out the other failed colume not accessed */
    720 		sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    721 		for (i = 0; i < numDataCol; i++) {
    722 			npda.raidAddress = sosAddr + (i * secPerSU);
    723 			(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
    724 			/* skip over dead disks */
    725 			if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
    726 				if (i != fcol[0])
    727 					break;
    728 		}
    729 		RF_ASSERT(i < numDataCol);
    730 		fcol[1] = i;
    731 	} else {
    732 		RF_ASSERT(nresults == 2);
    733 		pda0 = node->results[0];
    734 		bzero(pda0->bufPtr, bytesPerSector * pda0->numSector);
    735 		pda1 = node->results[1];
    736 		bzero(pda1->bufPtr, bytesPerSector * pda1->numSector);
    737 		/* determine the failed colume numbers of the two failed
    738 		 * disks. */
    739 		fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress);
    740 		fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress);
    741 		/* determine the offset and end offset of the access within
    742 		 * each failed stripe unit. */
    743 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector);
    744 		fsuend[0] = fsuoff[0] + pda0->numSector;
    745 		fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector);
    746 		fsuend[1] = fsuoff[1] + pda1->numSector;
    747 		/* determine the startSector to begin decoding */
    748 		startSector = RF_MIN(pda0->startSector, pda1->startSector);
    749 		/* determine the endSector to end decoding */
    750 		endSector = RF_MAX(fsuend[0], fsuend[1]);
    751 	}
    752 	/*
    753 	      assign the beginning sector and the end sector for each parameter
    754 	      find out the corresponding colume # for each parameter
    755         */
    756 	for (prm = 0; prm < ndataParam; prm++) {
    757 		pda = node->params[prm].p;
    758 		suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    759 		suend[prm] = suoff[prm] + pda->numSector;
    760 		prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress);
    761 	}
    762 	/* 'sector' is the sector for the current decoding algorithm. For each
    763 	 * sector in the failed SU, find out the corresponding parameters that
    764 	 * cover the current sector and that are needed for decoding of this
    765 	 * sector in failed SU. 2.  Find out if sector is in the shadow of any
    766 	 * accessed failed SU. If not, malloc a temporary space of a sector in
    767 	 * size. */
    768 	for (sector = startSector; sector < endSector; sector++) {
    769 		if (nresults == 2)
    770 			if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1]))
    771 				continue;
    772 		for (prm = 0; prm < ndataParam; prm++)
    773 			if (suoff[prm] <= sector && sector < suend[prm])
    774 				buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr +
    775 				    rf_RaidAddressToByte(raidPtr, sector - suoff[prm]);
    776 		/* find out if sector is in the shadow of any accessed failed
    777 		 * SU. If yes, assign dest[0], dest[1] to point at suitable
    778 		 * position of the buffer corresponding to failed SUs. if no,
    779 		 * malloc a temporary space of a sector in size for
    780 		 * destination of decoding. */
    781 		RF_ASSERT(nresults == 1 || nresults == 2);
    782 		if (nresults == 1) {
    783 			dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
    784 			/* Always malloc temp buffer to dest[1]  */
    785 			RF_Malloc(dest[1], bytesPerSector, (char *));
    786 			bzero(dest[1], bytesPerSector);
    787 			mallc_two = 1;
    788 		} else {
    789 			if (fsuoff[0] <= sector && sector < fsuend[0])
    790 				dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
    791 			else {
    792 				RF_Malloc(dest[0], bytesPerSector, (char *));
    793 				bzero(dest[0], bytesPerSector);
    794 				mallc_one = 1;
    795 			}
    796 			if (fsuoff[1] <= sector && sector < fsuend[1])
    797 				dest[1] = ((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]);
    798 			else {
    799 				RF_Malloc(dest[1], bytesPerSector, (char *));
    800 				bzero(dest[1], bytesPerSector);
    801 				mallc_two = 1;
    802 			}
    803 			RF_ASSERT(mallc_one == 0 || mallc_two == 0);
    804 		}
    805 		pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff);
    806 		ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff);
    807 		/*
    808 	         * After finish finding all needed sectors, call doubleEOdecode function for decoding
    809 	         * one sector to destination.
    810 	         */
    811 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
    812 		/* free all allocated memory, and mark flag to indicate no
    813 		 * memory is being allocated */
    814 		if (mallc_one == 1)
    815 			RF_Free(dest[0], bytesPerSector);
    816 		if (mallc_two == 1)
    817 			RF_Free(dest[1], bytesPerSector);
    818 		mallc_one = mallc_two = 0;
    819 	}
    820 	RF_Free(buf, numDataCol * sizeof(char *));
    821 	if (ndataParam != 0) {
    822 		RF_Free(suoff, ndataParam * sizeof(long));
    823 		RF_Free(suend, ndataParam * sizeof(long));
    824 		RF_Free(prmToCol, ndataParam * sizeof(long));
    825 	}
    826 	RF_ETIMER_STOP(timer);
    827 	RF_ETIMER_EVAL(timer);
    828 	if (tracerec) {
    829 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    830 	}
    831 	rf_GenericWakeupFunc(node, 0);
    832 #if 1
    833 	return (0);		/* XXX is this even close!!?!?!!? GO */
    834 #endif
    835 }
    836 
    837 
    838 /* currently, only access of one of the two failed SU is allowed in this function.
    839  * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
    840  * many accesses of single stripe unit.
    841  */
    842 
    843 int
    844 rf_EOWriteDoubleRecoveryFunc(node)
    845 	RF_DagNode_t *node;
    846 {
    847 	int     np = node->numParams;
    848 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    849 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    850 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    851 	RF_SectorNum_t sector;
    852 	RF_RowCol_t col, scol;
    853 	int     prm, i, j;
    854 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    855 	unsigned sosAddr;
    856 	unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    857 	RF_int64 numbytes;
    858 	RF_SectorNum_t startSector, endSector;
    859 	RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda;
    860 	RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
    861 	char  **buf;		/* buf[0], buf[1], buf[2], ...etc. point to
    862 				 * buffer storing data read from col0, col1,
    863 				 * col2 */
    864 	char   *ebuf, *pbuf, *dest[2], *olddata[2];
    865 	RF_Etimer_t timer;
    866 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    867 
    868 	RF_ASSERT(asmap->numDataFailed == 1);	/* currently only support this
    869 						 * case, the other failed SU
    870 						 * is not being accessed */
    871 	RF_ETIMER_START(timer);
    872 	RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
    873 
    874 	ppda = node->results[0];/* Instead of being buffers, node->results[0]
    875 				 * and [1] are Ppda and Epda  */
    876 	epda = node->results[1];
    877 	fpda = asmap->failedPDAs[0];
    878 
    879 	/* First, recovery the failed old SU using EvenOdd double decoding      */
    880 	/* determine the startSector and endSector for decoding */
    881 	startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector);
    882 	endSector = startSector + fpda->numSector;
    883 	/* Assign buf[col] pointers to point to each non-failed colume  and
    884 	 * initialize the pbuf and ebuf to point at the beginning of each
    885 	 * source buffers and destination buffers */
    886 	for (prm = 0; prm < numDataCol - 2; prm++) {
    887 		pda = (RF_PhysDiskAddr_t *) node->params[prm].p;
    888 		col = rf_EUCol(layoutPtr, pda->raidAddress);
    889 		buf[col] = pda->bufPtr;
    890 	}
    891 	/* pbuf and ebuf:  they will change values as double recovery decoding
    892 	 * goes on */
    893 	pbuf = ppda->bufPtr;
    894 	ebuf = epda->bufPtr;
    895 	/* find out the logical colume numbers in the encoding matrix of the
    896 	 * two failed columes */
    897 	fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress);
    898 
    899 	/* find out the other failed colume not accessed this time */
    900 	sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    901 	for (i = 0; i < numDataCol; i++) {
    902 		npda.raidAddress = sosAddr + (i * secPerSU);
    903 		(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
    904 		/* skip over dead disks */
    905 		if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
    906 			if (i != fcol[0])
    907 				break;
    908 	}
    909 	RF_ASSERT(i < numDataCol);
    910 	fcol[1] = i;
    911 	/* assign temporary space to put recovered failed SU */
    912 	numbytes = fpda->numSector * bytesPerSector;
    913 	RF_Malloc(olddata[0], numbytes, (char *));
    914 	RF_Malloc(olddata[1], numbytes, (char *));
    915 	dest[0] = olddata[0];
    916 	dest[1] = olddata[1];
    917 	bzero(olddata[0], numbytes);
    918 	bzero(olddata[1], numbytes);
    919 	/* Begin the recovery decoding, initially buf[j],  ebuf, pbuf, dest[j]
    920 	 * have already pointed at the beginning of each source buffers and
    921 	 * destination buffers */
    922 	for (sector = startSector, i = 0; sector < endSector; sector++, i++) {
    923 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
    924 		for (j = 0; j < numDataCol; j++)
    925 			if ((j != fcol[0]) && (j != fcol[1]))
    926 				buf[j] += bytesPerSector;
    927 		dest[0] += bytesPerSector;
    928 		dest[1] += bytesPerSector;
    929 		ebuf += bytesPerSector;
    930 		pbuf += bytesPerSector;
    931 	}
    932 	/* after recovery, the buffer pointed by olddata[0] is the old failed
    933 	 * data. With new writing data and this old data, use small write to
    934 	 * calculate the new redundant informations */
    935 	/* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of
    936 	 * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol
    937 	 * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[
    938 	 * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol
    939 	 * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of
    940 	 * wudNodes; For current implementation, we assume the simplest case:
    941 	 * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1
    942 	 * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new
    943 	 * data to be writen to the failed disk. We first bxor the new data
    944 	 * into the old recovered data, then do the same things as small
    945 	 * write. */
    946 
    947 	rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes, node->dagHdr->bp);
    948 	/* do new 'E' calculation  */
    949 	/* find out the corresponding colume in encoding matrix for write
    950 	 * colume to be encoded into redundant disk 'E' */
    951 	scol = rf_EUCol(layoutPtr, fpda->raidAddress);
    952 	/* olddata[0] now is source buffer pointer; epda->bufPtr is the dest
    953 	 * buffer pointer               */
    954 	rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
    955 
    956 	/* do new 'P' calculation  */
    957 	rf_bxor(olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp);
    958 	/* Free the allocated buffer  */
    959 	RF_Free(olddata[0], numbytes);
    960 	RF_Free(olddata[1], numbytes);
    961 	RF_Free(buf, numDataCol * sizeof(char *));
    962 
    963 	RF_ETIMER_STOP(timer);
    964 	RF_ETIMER_EVAL(timer);
    965 	if (tracerec) {
    966 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    967 	}
    968 	rf_GenericWakeupFunc(node, 0);
    969 	return (0);
    970 }
    971