Home | History | Annotate | Line # | Download | only in raidframe
rf_evenodd_dagfuncs.c revision 1.6
      1 /*	$NetBSD: rf_evenodd_dagfuncs.c,v 1.6 2000/03/30 12:45:40 augustss Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: ChangMing Wu
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  * Code for RAID-EVENODD  architecture.
     31  */
     32 
     33 #include "rf_types.h"
     34 #include "rf_raid.h"
     35 #include "rf_dag.h"
     36 #include "rf_dagffrd.h"
     37 #include "rf_dagffwr.h"
     38 #include "rf_dagdegrd.h"
     39 #include "rf_dagdegwr.h"
     40 #include "rf_dagutils.h"
     41 #include "rf_dagfuncs.h"
     42 #include "rf_etimer.h"
     43 #include "rf_general.h"
     44 #include "rf_configure.h"
     45 #include "rf_parityscan.h"
     46 #include "rf_evenodd.h"
     47 #include "rf_evenodd_dagfuncs.h"
     48 
     49 /* These redundant functions are for small write */
     50 RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P"};
     51 RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E"};
     52 /* These redundant functions are for degraded read */
     53 RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
     54 RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func"};
     55 /**********************************************************************************************
     56  *   the following encoding node functions is used in  EO_000_CreateLargeWriteDAG
     57  **********************************************************************************************/
     58 int
     59 rf_RegularPEFunc(node)
     60 	RF_DagNode_t *node;
     61 {
     62 	rf_RegularESubroutine(node, node->results[1]);
     63 	rf_RegularXorFunc(node);/* does the wakeup here! */
     64 #if 1
     65 	return (0);		/* XXX This was missing... GO */
     66 #endif
     67 }
     68 
     69 
     70 /************************************************************************************************
     71  *  For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
     72  *  be used. The previous case is when write access at least sectors of full stripe unit.
     73  *  The later function is used when the write access two stripe units but with total sectors
     74  *  less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
     75  *  areas in their stripe unit and  parity write and 'E' write are both devided into two distinct
     76  *  writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
     77  ************************************************************************************************/
     78 
     79 /* Algorithm:
     80      1. Store the difference of old data and new data in the Rod buffer.
     81      2. then encode this buffer into the buffer which already have old 'E' information inside it,
     82 	the result can be shown to be the new 'E' information.
     83      3. xor the Wnd buffer into the difference buffer to recover the  original old data.
     84    Here we have another alternative: to allocate a temporary buffer for storing the difference of
     85    old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
     86    take the same speed as the previous, and need more memory.
     87 */
     88 int
     89 rf_RegularONEFunc(node)
     90 	RF_DagNode_t *node;
     91 {
     92 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
     93 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
     94 	int     EpdaIndex = (node->numParams - 1) / 2 - 1;	/* the parameter of node
     95 								 * where you can find
     96 								 * e-pda */
     97 	int     i, k, retcode = 0;
     98 	int     suoffset, length;
     99 	RF_RowCol_t scol;
    100 	char   *srcbuf, *destbuf;
    101 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    102 	RF_Etimer_t timer;
    103 	RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
    104 	int     ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);	/* generally zero  */
    105 
    106 	RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
    107 	RF_ASSERT(ESUOffset == 0);
    108 
    109 	RF_ETIMER_START(timer);
    110 
    111 	/* Xor the Wnd buffer into Rod buffer, the difference of old data and
    112 	 * new data is stored in Rod buffer */
    113 	for (k = 0; k < EpdaIndex; k += 2) {
    114 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
    115 		retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp);
    116 	}
    117 	/* Start to encoding the buffer storing the difference of old data and
    118 	 * new data into 'E' buffer  */
    119 	for (i = 0; i < EpdaIndex; i += 2)
    120 		if (node->params[i + 1].p != node->results[0]) {	/* results[0] is buf ptr
    121 									 * of E */
    122 			pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    123 			srcbuf = (char *) node->params[i + 1].p;
    124 			scol = rf_EUCol(layoutPtr, pda->raidAddress);
    125 			suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    126 			destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset);
    127 			rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    128 		}
    129 	/* Recover the original old data to be used by parity encoding
    130 	 * function in XorNode */
    131 	for (k = 0; k < EpdaIndex; k += 2) {
    132 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
    133 		retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp);
    134 	}
    135 	RF_ETIMER_STOP(timer);
    136 	RF_ETIMER_EVAL(timer);
    137 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    138 	rf_GenericWakeupFunc(node, 0);
    139 #if 1
    140 	return (0);		/* XXX this was missing.. GO */
    141 #endif
    142 }
    143 
    144 int
    145 rf_SimpleONEFunc(node)
    146 	RF_DagNode_t *node;
    147 {
    148 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    149 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    150 	RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
    151 	int     retcode = 0;
    152 	char   *srcbuf, *destbuf;
    153 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    154 	int     length;
    155 	RF_RowCol_t scol;
    156 	RF_Etimer_t timer;
    157 
    158 	RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q);
    159 	if (node->dagHdr->status == rf_enable) {
    160 		RF_ETIMER_START(timer);
    161 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector);	/* this is a pda of
    162 														 * writeDataNodes */
    163 		/* bxor to buffer of readDataNodes */
    164 		retcode = rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
    165 		/* find out the corresponding colume in encoding matrix for
    166 		 * write colume to be encoded into redundant disk 'E' */
    167 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    168 		srcbuf = node->params[1].p;
    169 		destbuf = node->params[3].p;
    170 		/* Start encoding process */
    171 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    172 		rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
    173 		RF_ETIMER_STOP(timer);
    174 		RF_ETIMER_EVAL(timer);
    175 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    176 
    177 	}
    178 	return (rf_GenericWakeupFunc(node, retcode));	/* call wake func
    179 							 * explicitly since no
    180 							 * I/O in this node */
    181 }
    182 
    183 
    184 /****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write  ********/
    185 void
    186 rf_RegularESubroutine(node, ebuf)
    187 	RF_DagNode_t *node;
    188 	char   *ebuf;
    189 {
    190 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    191 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    192 	RF_PhysDiskAddr_t *pda;
    193 	int     i, suoffset;
    194 	RF_RowCol_t scol;
    195 	char   *srcbuf, *destbuf;
    196 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    197 	RF_Etimer_t timer;
    198 
    199 	RF_ETIMER_START(timer);
    200 	for (i = 0; i < node->numParams - 2; i += 2) {
    201 		RF_ASSERT(node->params[i + 1].p != ebuf);
    202 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    203 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    204 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    205 		srcbuf = (char *) node->params[i + 1].p;
    206 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset);
    207 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    208 	}
    209 	RF_ETIMER_STOP(timer);
    210 	RF_ETIMER_EVAL(timer);
    211 	tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    212 }
    213 
    214 
    215 /*******************************************************************************************
    216  *			 Used in  EO_001_CreateLargeWriteDAG
    217  ******************************************************************************************/
    218 int
    219 rf_RegularEFunc(node)
    220 	RF_DagNode_t *node;
    221 {
    222 	rf_RegularESubroutine(node, node->results[0]);
    223 	rf_GenericWakeupFunc(node, 0);
    224 #if 1
    225 	return (0);		/* XXX this was missing?.. GO */
    226 #endif
    227 }
    228 /*******************************************************************************************
    229  * This degraded function allow only two case:
    230  *  1. when write access the full failed stripe unit, then the access can be more than
    231  *     one tripe units.
    232  *  2. when write access only part of the failed SU, we assume accesses of more than
    233  *     one stripe unit is not allowed so that the write can be dealt with like a
    234  *     large write.
    235  *  The following function is based on these assumptions. So except in the second case,
    236  *  it looks the same as a large write encodeing function. But this is not exactly the
    237  *  normal way for doing a degraded write, since raidframe have to break cases of access
    238  *  other than the above two into smaller accesses. We may have to change
    239  *  DegrESubroutin in the future.
    240  *******************************************************************************************/
    241 void
    242 rf_DegrESubroutine(node, ebuf)
    243 	RF_DagNode_t *node;
    244 	char   *ebuf;
    245 {
    246 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    247 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    248 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    249 	RF_PhysDiskAddr_t *pda;
    250 	int     i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    251 	RF_RowCol_t scol;
    252 	char   *srcbuf, *destbuf;
    253 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    254 	RF_Etimer_t timer;
    255 
    256 	RF_ETIMER_START(timer);
    257 	for (i = 0; i < node->numParams - 2; i += 2) {
    258 		RF_ASSERT(node->params[i + 1].p != ebuf);
    259 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    260 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    261 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    262 		srcbuf = (char *) node->params[i + 1].p;
    263 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    264 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    265 	}
    266 
    267 	RF_ETIMER_STOP(timer);
    268 	RF_ETIMER_EVAL(timer);
    269 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    270 }
    271 
    272 
    273 /**************************************************************************************
    274  * This function is used in case where one data disk failed and both redundant disks
    275  * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
    276  * failed in the stripe but not accessed at this time, then we should, instead, use
    277  * the rf_EOWriteDoubleRecoveryFunc().
    278  **************************************************************************************/
    279 int
    280 rf_Degraded_100_EOFunc(node)
    281 	RF_DagNode_t *node;
    282 {
    283 	rf_DegrESubroutine(node, node->results[1]);
    284 	rf_RecoveryXorFunc(node);	/* does the wakeup here! */
    285 #if 1
    286 	return (0);		/* XXX this was missing... SHould these be
    287 				 * void functions??? GO */
    288 #endif
    289 }
    290 /**************************************************************************************
    291  * This function is to encode one sector in one of the data disks to the E disk.
    292  * However, in evenodd this function can also be used as decoding function to recover
    293  * data from dead disk in the case of parity failure and a single data failure.
    294  **************************************************************************************/
    295 void
    296 rf_e_EncOneSect(
    297     RF_RowCol_t srcLogicCol,
    298     char *srcSecbuf,
    299     RF_RowCol_t destLogicCol,
    300     char *destSecbuf,
    301     int bytesPerSector)
    302 {
    303 	int     S_index;	/* index of the EU in the src col which need
    304 				 * be Xored into all EUs in a dest sector */
    305 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
    306 	RF_RowCol_t j, indexInDest,	/* row index of an encoding unit in
    307 					 * the destination colume of encoding
    308 					 * matrix */
    309 	        indexInSrc;	/* row index of an encoding unit in the source
    310 				 * colume used for recovery */
    311 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
    312 
    313 #if RF_EO_MATRIX_DIM > 17
    314 	int     shortsPerEU = bytesPerEU / sizeof(short);
    315 	short  *destShortBuf, *srcShortBuf1, *srcShortBuf2;
    316 	short temp1;
    317 #elif RF_EO_MATRIX_DIM == 17
    318 	int     longsPerEU = bytesPerEU / sizeof(long);
    319 	long   *destLongBuf, *srcLongBuf1, *srcLongBuf2;
    320 	long temp1;
    321 #endif
    322 
    323 #if RF_EO_MATRIX_DIM > 17
    324 	RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1);
    325 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
    326 #elif RF_EO_MATRIX_DIM == 17
    327 	RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4);
    328 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
    329 #endif
    330 
    331 	S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    332 #if RF_EO_MATRIX_DIM > 17
    333 	srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU);
    334 #elif RF_EO_MATRIX_DIM == 17
    335 	srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU);
    336 #endif
    337 
    338 	for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) {
    339 		indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    340 
    341 #if RF_EO_MATRIX_DIM > 17
    342 		destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU);
    343 		srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU);
    344 		for (j = 0; j < shortsPerEU; j++) {
    345 			temp1 = destShortBuf[j] ^ srcShortBuf1[j];
    346 			/* note: S_index won't be at the end row for any src
    347 			 * col! */
    348 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
    349 				destShortBuf[j] = (srcShortBuf2[j]) ^ temp1;
    350 			/* if indexInSrc is at the end row, ie.
    351 			 * RF_EO_MATRIX_DIM -1, then all elements are zero! */
    352 			else
    353 				destShortBuf[j] = temp1;
    354 		}
    355 
    356 #elif RF_EO_MATRIX_DIM == 17
    357 		destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU);
    358 		srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU);
    359 		for (j = 0; j < longsPerEU; j++) {
    360 			temp1 = destLongBuf[j] ^ srcLongBuf1[j];
    361 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
    362 				destLongBuf[j] = (srcLongBuf2[j]) ^ temp1;
    363 			else
    364 				destLongBuf[j] = temp1;
    365 		}
    366 #endif
    367 	}
    368 }
    369 
    370 void
    371 rf_e_encToBuf(
    372     RF_Raid_t * raidPtr,
    373     RF_RowCol_t srcLogicCol,
    374     char *srcbuf,
    375     RF_RowCol_t destLogicCol,
    376     char *destbuf,
    377     int numSector)
    378 {
    379 	int     i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    380 
    381 	for (i = 0; i < numSector; i++) {
    382 		rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
    383 		srcbuf += bytesPerSector;
    384 		destbuf += bytesPerSector;
    385 	}
    386 }
    387 /**************************************************************************************
    388  * when parity die and one data die, We use second redundant information, 'E',
    389  * to recover the data in dead disk. This function is used in the recovery node of
    390  * for EO_110_CreateReadDAG
    391  **************************************************************************************/
    392 int
    393 rf_RecoveryEFunc(node)
    394 	RF_DagNode_t *node;
    395 {
    396 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    397 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    398 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    399 	RF_RowCol_t scol,	/* source logical column */
    400 	        fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress);	/* logical column of
    401 									 * failed SU */
    402 	int     i;
    403 	RF_PhysDiskAddr_t *pda;
    404 	int     suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    405 	char   *srcbuf, *destbuf;
    406 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    407 	RF_Etimer_t timer;
    408 
    409 	bzero((char *) node->results[0], rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
    410 	if (node->dagHdr->status == rf_enable) {
    411 		RF_ETIMER_START(timer);
    412 		for (i = 0; i < node->numParams - 2; i += 2)
    413 			if (node->params[i + 1].p != node->results[0]) {
    414 				pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    415 				if (i == node->numParams - 4)
    416 					scol = RF_EO_MATRIX_DIM - 2;	/* the colume of
    417 									 * redundant E */
    418 				else
    419 					scol = rf_EUCol(layoutPtr, pda->raidAddress);
    420 				srcbuf = (char *) node->params[i + 1].p;
    421 				suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    422 				destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    423 				rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
    424 			}
    425 		RF_ETIMER_STOP(timer);
    426 		RF_ETIMER_EVAL(timer);
    427 		tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    428 	}
    429 	return (rf_GenericWakeupFunc(node, 0));	/* node execute successfully */
    430 }
    431 /**************************************************************************************
    432  * This function is used in the case where one data and the parity have filed.
    433  * (in EO_110_CreateWriteDAG )
    434  **************************************************************************************/
    435 int
    436 rf_EO_DegradedWriteEFunc(RF_DagNode_t * node)
    437 {
    438 	rf_DegrESubroutine(node, node->results[0]);
    439 	rf_GenericWakeupFunc(node, 0);
    440 #if 1
    441 	return (0);		/* XXX Yet another one!! GO */
    442 #endif
    443 }
    444 
    445 
    446 
    447 /**************************************************************************************
    448  *  		THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
    449  **************************************************************************************/
    450 
    451 void
    452 rf_doubleEOdecode(
    453     RF_Raid_t * raidPtr,
    454     char **rrdbuf,
    455     char **dest,
    456     RF_RowCol_t * fcol,
    457     char *pbuf,
    458     char *ebuf)
    459 {
    460 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    461 	int     i, j, k, f1, f2, row;
    462 	int     rrdrow, erow, count = 0;
    463 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    464 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
    465 #if 0
    466 	int     pcol = (RF_EO_MATRIX_DIM) - 1;
    467 #endif
    468 	int     ecol = (RF_EO_MATRIX_DIM) - 2;
    469 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
    470 	int     numDataCol = layoutPtr->numDataCol;
    471 #if RF_EO_MATRIX_DIM > 17
    472 	int     shortsPerEU = bytesPerEU / sizeof(short);
    473 	short  *rrdbuf_current, *pbuf_current, *ebuf_current;
    474 	short  *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    475 	short *temp;
    476 	short  *P;
    477 
    478 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
    479 	RF_Malloc(P, bytesPerEU, (short *));
    480 	RF_Malloc(temp, bytesPerEU, (short *));
    481 #elif RF_EO_MATRIX_DIM == 17
    482 	int     longsPerEU = bytesPerEU / sizeof(long);
    483 	long   *rrdbuf_current, *pbuf_current, *ebuf_current;
    484 	long   *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    485 	long *temp;
    486 	long   *P;
    487 
    488 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
    489 	RF_Malloc(P, bytesPerEU, (long *));
    490 	RF_Malloc(temp, bytesPerEU, (long *));
    491 #endif
    492 	RF_ASSERT(*((long *) dest[0]) == 0);
    493 	RF_ASSERT(*((long *) dest[1]) == 0);
    494 	bzero((char *) P, bytesPerEU);
    495 	bzero((char *) temp, bytesPerEU);
    496 	RF_ASSERT(*P == 0);
    497 	/* calculate the 'P' parameter, which, not parity, is the Xor of all
    498 	 * elements in the last two column, ie. 'E' and 'parity' colume, see
    499 	 * the Ref. paper by Blaum, et al 1993  */
    500 	for (i = 0; i < numRowInEncMatix; i++)
    501 		for (k = 0; k < longsPerEU; k++) {
    502 #if RF_EO_MATRIX_DIM > 17
    503 			ebuf_current = ((short *) ebuf) + i * shortsPerEU + k;
    504 			pbuf_current = ((short *) pbuf) + i * shortsPerEU + k;
    505 #elif RF_EO_MATRIX_DIM == 17
    506 			ebuf_current = ((long *) ebuf) + i * longsPerEU + k;
    507 			pbuf_current = ((long *) pbuf) + i * longsPerEU + k;
    508 #endif
    509 			P[k] ^= *ebuf_current;
    510 			P[k] ^= *pbuf_current;
    511 		}
    512 	RF_ASSERT(fcol[0] != fcol[1]);
    513 	if (fcol[0] < fcol[1]) {
    514 #if RF_EO_MATRIX_DIM > 17
    515 		dest_smaller = (short *) (dest[0]);
    516 		dest_larger = (short *) (dest[1]);
    517 #elif RF_EO_MATRIX_DIM == 17
    518 		dest_smaller = (long *) (dest[0]);
    519 		dest_larger = (long *) (dest[1]);
    520 #endif
    521 		f1 = fcol[0];
    522 		f2 = fcol[1];
    523 	} else {
    524 #if RF_EO_MATRIX_DIM > 17
    525 		dest_smaller = (short *) (dest[1]);
    526 		dest_larger = (short *) (dest[0]);
    527 #elif RF_EO_MATRIX_DIM == 17
    528 		dest_smaller = (long *) (dest[1]);
    529 		dest_larger = (long *) (dest[0]);
    530 #endif
    531 		f1 = fcol[1];
    532 		f2 = fcol[0];
    533 	}
    534 	row = (RF_EO_MATRIX_DIM) - 1;
    535 	while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) {
    536 #if RF_EO_MATRIX_DIM > 17
    537 		dest_larger_current = dest_larger + row * shortsPerEU;
    538 		dest_smaller_current = dest_smaller + row * shortsPerEU;
    539 #elif RF_EO_MATRIX_DIM == 17
    540 		dest_larger_current = dest_larger + row * longsPerEU;
    541 		dest_smaller_current = dest_smaller + row * longsPerEU;
    542 #endif
    543 		/**    Do the diagonal recovery. Initially, temp[k] = (failed 1),
    544 		       which is the failed data in the colume which has smaller col index. **/
    545 		/* step 1:  ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3))         */
    546 		for (j = 0; j < numDataCol; j++) {
    547 			if (j == f1 || j == f2)
    548 				continue;
    549 			rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM);
    550 			if (rrdrow != (RF_EO_MATRIX_DIM) - 1) {
    551 #if RF_EO_MATRIX_DIM > 17
    552 				rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU;
    553 				for (k = 0; k < shortsPerEU; k++)
    554 					temp[k] ^= *(rrdbuf_current + k);
    555 #elif RF_EO_MATRIX_DIM == 17
    556 				rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU;
    557 				for (k = 0; k < longsPerEU; k++)
    558 					temp[k] ^= *(rrdbuf_current + k);
    559 #endif
    560 			}
    561 		}
    562 		/* step 2:  ^E(erow,m-2), If erow is at the buttom row, don't
    563 		 * Xor into it  E(erow,m-2) = (principle diagonal) ^ (failed
    564 		 * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal
    565 		 * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle
    566 		 * diagonal) ^ (failed 2)       */
    567 
    568 		erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM));
    569 		if (erow != (RF_EO_MATRIX_DIM) - 1) {
    570 #if RF_EO_MATRIX_DIM > 17
    571 			ebuf_current = (short *) ebuf + shortsPerEU * erow;
    572 			for (k = 0; k < shortsPerEU; k++)
    573 				temp[k] ^= *(ebuf_current + k);
    574 #elif RF_EO_MATRIX_DIM == 17
    575 			ebuf_current = (long *) ebuf + longsPerEU * erow;
    576 			for (k = 0; k < longsPerEU; k++)
    577 				temp[k] ^= *(ebuf_current + k);
    578 #endif
    579 		}
    580 		/* step 3: ^P to obtain the failed data (failed 2).  P can be
    581 		 * proved to be actually  (principle diagonal)  After this
    582 		 * step, temp[k] = (failed 2), the failed data to be recovered */
    583 #if RF_EO_MATRIX_DIM > 17
    584 		for (k = 0; k < shortsPerEU; k++)
    585 			temp[k] ^= P[k];
    586 		/* Put the data to the destination buffer                              */
    587 		for (k = 0; k < shortsPerEU; k++)
    588 			dest_larger_current[k] = temp[k];
    589 #elif RF_EO_MATRIX_DIM == 17
    590 		for (k = 0; k < longsPerEU; k++)
    591 			temp[k] ^= P[k];
    592 		/* Put the data to the destination buffer                              */
    593 		for (k = 0; k < longsPerEU; k++)
    594 			dest_larger_current[k] = temp[k];
    595 #endif
    596 
    597 		/**          THE FOLLOWING DO THE HORIZONTAL XOR                **/
    598 		/* step 1:  ^(SUM of A(row,0..m-3)), ie. all nonfailed data
    599 		 * columes    */
    600 		for (j = 0; j < numDataCol; j++) {
    601 			if (j == f1 || j == f2)
    602 				continue;
    603 #if RF_EO_MATRIX_DIM > 17
    604 			rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU;
    605 			for (k = 0; k < shortsPerEU; k++)
    606 				temp[k] ^= *(rrdbuf_current + k);
    607 #elif RF_EO_MATRIX_DIM == 17
    608 			rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU;
    609 			for (k = 0; k < longsPerEU; k++)
    610 				temp[k] ^= *(rrdbuf_current + k);
    611 #endif
    612 		}
    613 		/* step 2: ^A(row,m-1) */
    614 		/* step 3: Put the data to the destination buffer                             	 */
    615 #if RF_EO_MATRIX_DIM > 17
    616 		pbuf_current = (short *) pbuf + shortsPerEU * row;
    617 		for (k = 0; k < shortsPerEU; k++)
    618 			temp[k] ^= *(pbuf_current + k);
    619 		for (k = 0; k < shortsPerEU; k++)
    620 			dest_smaller_current[k] = temp[k];
    621 #elif RF_EO_MATRIX_DIM == 17
    622 		pbuf_current = (long *) pbuf + longsPerEU * row;
    623 		for (k = 0; k < longsPerEU; k++)
    624 			temp[k] ^= *(pbuf_current + k);
    625 		for (k = 0; k < longsPerEU; k++)
    626 			dest_smaller_current[k] = temp[k];
    627 #endif
    628 		count++;
    629 	}
    630 	/* Check if all Encoding Unit in the data buffer have been decoded,
    631 	 * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
    632 	 * this algorithm will covered all buffer 				 */
    633 	RF_ASSERT(count == numRowInEncMatix);
    634 	RF_Free((char *) P, bytesPerEU);
    635 	RF_Free((char *) temp, bytesPerEU);
    636 }
    637 
    638 
    639 /***************************************************************************************
    640 * 	This function is called by double degragded read
    641 * 	EO_200_CreateReadDAG
    642 *
    643 ***************************************************************************************/
    644 int
    645 rf_EvenOddDoubleRecoveryFunc(node)
    646 	RF_DagNode_t *node;
    647 {
    648 	int     ndataParam = 0;
    649 	int     np = node->numParams;
    650 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    651 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    652 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    653 	int     i, prm, sector, nresults = node->numResults;
    654 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    655 	unsigned sosAddr;
    656 	int     two = 0, mallc_one = 0, mallc_two = 0;	/* flags to indicate if
    657 							 * memory is allocated */
    658 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    659 	RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1,
    660 	        npda;
    661 	RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
    662 	char  **buf, *ebuf, *pbuf, *dest[2];
    663 	long   *suoff = NULL, *suend = NULL, *prmToCol = NULL, psuoff, esuoff;
    664 	RF_SectorNum_t startSector, endSector;
    665 	RF_Etimer_t timer;
    666 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    667 
    668 	RF_ETIMER_START(timer);
    669 
    670 	/* Find out the number of parameters which are pdas for data
    671 	 * information */
    672 	for (i = 0; i <= np; i++)
    673 		if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) {
    674 			ndataParam = i;
    675 			break;
    676 		}
    677 	RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
    678 	if (ndataParam != 0) {
    679 		RF_Malloc(suoff, ndataParam * sizeof(long), (long *));
    680 		RF_Malloc(suend, ndataParam * sizeof(long), (long *));
    681 		RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *));
    682 	}
    683 	if (asmap->failedPDAs[1] &&
    684 	    (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
    685 		RF_ASSERT(0);	/* currently, no support for this situation */
    686 		ppda = node->params[np - 6].p;
    687 		ppda2 = node->params[np - 5].p;
    688 		RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY);
    689 		epda = node->params[np - 4].p;
    690 		epda2 = node->params[np - 3].p;
    691 		RF_ASSERT(epda2->type == RF_PDA_TYPE_Q);
    692 		two = 1;
    693 	} else {
    694 		ppda = node->params[np - 4].p;
    695 		epda = node->params[np - 3].p;
    696 		psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
    697 		esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
    698 		RF_ASSERT(psuoff == esuoff);
    699 	}
    700 	/*
    701             the followings have three goals:
    702             1. determine the startSector to begin decoding and endSector to end decoding.
    703             2. determine the colume numbers of the two failed disks.
    704             3. determine the offset and end offset of the access within each failed stripe unit.
    705          */
    706 	if (nresults == 1) {
    707 		/* find the startSector to begin decoding */
    708 		pda = node->results[0];
    709 		bzero(pda->bufPtr, bytesPerSector * pda->numSector);
    710 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    711 		fsuend[0] = fsuoff[0] + pda->numSector;
    712 		startSector = fsuoff[0];
    713 		endSector = fsuend[0];
    714 
    715 		/* find out the column of failed disk being accessed */
    716 		fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress);
    717 
    718 		/* find out the other failed colume not accessed */
    719 		sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    720 		for (i = 0; i < numDataCol; i++) {
    721 			npda.raidAddress = sosAddr + (i * secPerSU);
    722 			(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
    723 			/* skip over dead disks */
    724 			if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
    725 				if (i != fcol[0])
    726 					break;
    727 		}
    728 		RF_ASSERT(i < numDataCol);
    729 		fcol[1] = i;
    730 	} else {
    731 		RF_ASSERT(nresults == 2);
    732 		pda0 = node->results[0];
    733 		bzero(pda0->bufPtr, bytesPerSector * pda0->numSector);
    734 		pda1 = node->results[1];
    735 		bzero(pda1->bufPtr, bytesPerSector * pda1->numSector);
    736 		/* determine the failed colume numbers of the two failed
    737 		 * disks. */
    738 		fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress);
    739 		fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress);
    740 		/* determine the offset and end offset of the access within
    741 		 * each failed stripe unit. */
    742 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector);
    743 		fsuend[0] = fsuoff[0] + pda0->numSector;
    744 		fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector);
    745 		fsuend[1] = fsuoff[1] + pda1->numSector;
    746 		/* determine the startSector to begin decoding */
    747 		startSector = RF_MIN(pda0->startSector, pda1->startSector);
    748 		/* determine the endSector to end decoding */
    749 		endSector = RF_MAX(fsuend[0], fsuend[1]);
    750 	}
    751 	/*
    752 	      assign the beginning sector and the end sector for each parameter
    753 	      find out the corresponding colume # for each parameter
    754         */
    755 	for (prm = 0; prm < ndataParam; prm++) {
    756 		pda = node->params[prm].p;
    757 		suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    758 		suend[prm] = suoff[prm] + pda->numSector;
    759 		prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress);
    760 	}
    761 	/* 'sector' is the sector for the current decoding algorithm. For each
    762 	 * sector in the failed SU, find out the corresponding parameters that
    763 	 * cover the current sector and that are needed for decoding of this
    764 	 * sector in failed SU. 2.  Find out if sector is in the shadow of any
    765 	 * accessed failed SU. If not, malloc a temporary space of a sector in
    766 	 * size. */
    767 	for (sector = startSector; sector < endSector; sector++) {
    768 		if (nresults == 2)
    769 			if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1]))
    770 				continue;
    771 		for (prm = 0; prm < ndataParam; prm++)
    772 			if (suoff[prm] <= sector && sector < suend[prm])
    773 				buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr +
    774 				    rf_RaidAddressToByte(raidPtr, sector - suoff[prm]);
    775 		/* find out if sector is in the shadow of any accessed failed
    776 		 * SU. If yes, assign dest[0], dest[1] to point at suitable
    777 		 * position of the buffer corresponding to failed SUs. if no,
    778 		 * malloc a temporary space of a sector in size for
    779 		 * destination of decoding. */
    780 		RF_ASSERT(nresults == 1 || nresults == 2);
    781 		if (nresults == 1) {
    782 			dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
    783 			/* Always malloc temp buffer to dest[1]  */
    784 			RF_Malloc(dest[1], bytesPerSector, (char *));
    785 			bzero(dest[1], bytesPerSector);
    786 			mallc_two = 1;
    787 		} else {
    788 			if (fsuoff[0] <= sector && sector < fsuend[0])
    789 				dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
    790 			else {
    791 				RF_Malloc(dest[0], bytesPerSector, (char *));
    792 				bzero(dest[0], bytesPerSector);
    793 				mallc_one = 1;
    794 			}
    795 			if (fsuoff[1] <= sector && sector < fsuend[1])
    796 				dest[1] = ((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]);
    797 			else {
    798 				RF_Malloc(dest[1], bytesPerSector, (char *));
    799 				bzero(dest[1], bytesPerSector);
    800 				mallc_two = 1;
    801 			}
    802 			RF_ASSERT(mallc_one == 0 || mallc_two == 0);
    803 		}
    804 		pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff);
    805 		ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff);
    806 		/*
    807 	         * After finish finding all needed sectors, call doubleEOdecode function for decoding
    808 	         * one sector to destination.
    809 	         */
    810 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
    811 		/* free all allocated memory, and mark flag to indicate no
    812 		 * memory is being allocated */
    813 		if (mallc_one == 1)
    814 			RF_Free(dest[0], bytesPerSector);
    815 		if (mallc_two == 1)
    816 			RF_Free(dest[1], bytesPerSector);
    817 		mallc_one = mallc_two = 0;
    818 	}
    819 	RF_Free(buf, numDataCol * sizeof(char *));
    820 	if (ndataParam != 0) {
    821 		RF_Free(suoff, ndataParam * sizeof(long));
    822 		RF_Free(suend, ndataParam * sizeof(long));
    823 		RF_Free(prmToCol, ndataParam * sizeof(long));
    824 	}
    825 	RF_ETIMER_STOP(timer);
    826 	RF_ETIMER_EVAL(timer);
    827 	if (tracerec) {
    828 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    829 	}
    830 	rf_GenericWakeupFunc(node, 0);
    831 #if 1
    832 	return (0);		/* XXX is this even close!!?!?!!? GO */
    833 #endif
    834 }
    835 
    836 
    837 /* currently, only access of one of the two failed SU is allowed in this function.
    838  * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
    839  * many accesses of single stripe unit.
    840  */
    841 
    842 int
    843 rf_EOWriteDoubleRecoveryFunc(node)
    844 	RF_DagNode_t *node;
    845 {
    846 	int     np = node->numParams;
    847 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    848 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    849 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    850 	RF_SectorNum_t sector;
    851 	RF_RowCol_t col, scol;
    852 	int     prm, i, j;
    853 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    854 	unsigned sosAddr;
    855 	unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    856 	RF_int64 numbytes;
    857 	RF_SectorNum_t startSector, endSector;
    858 	RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda;
    859 	RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
    860 	char  **buf;		/* buf[0], buf[1], buf[2], ...etc. point to
    861 				 * buffer storing data read from col0, col1,
    862 				 * col2 */
    863 	char   *ebuf, *pbuf, *dest[2], *olddata[2];
    864 	RF_Etimer_t timer;
    865 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    866 
    867 	RF_ASSERT(asmap->numDataFailed == 1);	/* currently only support this
    868 						 * case, the other failed SU
    869 						 * is not being accessed */
    870 	RF_ETIMER_START(timer);
    871 	RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
    872 
    873 	ppda = node->results[0];/* Instead of being buffers, node->results[0]
    874 				 * and [1] are Ppda and Epda  */
    875 	epda = node->results[1];
    876 	fpda = asmap->failedPDAs[0];
    877 
    878 	/* First, recovery the failed old SU using EvenOdd double decoding      */
    879 	/* determine the startSector and endSector for decoding */
    880 	startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector);
    881 	endSector = startSector + fpda->numSector;
    882 	/* Assign buf[col] pointers to point to each non-failed colume  and
    883 	 * initialize the pbuf and ebuf to point at the beginning of each
    884 	 * source buffers and destination buffers */
    885 	for (prm = 0; prm < numDataCol - 2; prm++) {
    886 		pda = (RF_PhysDiskAddr_t *) node->params[prm].p;
    887 		col = rf_EUCol(layoutPtr, pda->raidAddress);
    888 		buf[col] = pda->bufPtr;
    889 	}
    890 	/* pbuf and ebuf:  they will change values as double recovery decoding
    891 	 * goes on */
    892 	pbuf = ppda->bufPtr;
    893 	ebuf = epda->bufPtr;
    894 	/* find out the logical colume numbers in the encoding matrix of the
    895 	 * two failed columes */
    896 	fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress);
    897 
    898 	/* find out the other failed colume not accessed this time */
    899 	sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    900 	for (i = 0; i < numDataCol; i++) {
    901 		npda.raidAddress = sosAddr + (i * secPerSU);
    902 		(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
    903 		/* skip over dead disks */
    904 		if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
    905 			if (i != fcol[0])
    906 				break;
    907 	}
    908 	RF_ASSERT(i < numDataCol);
    909 	fcol[1] = i;
    910 	/* assign temporary space to put recovered failed SU */
    911 	numbytes = fpda->numSector * bytesPerSector;
    912 	RF_Malloc(olddata[0], numbytes, (char *));
    913 	RF_Malloc(olddata[1], numbytes, (char *));
    914 	dest[0] = olddata[0];
    915 	dest[1] = olddata[1];
    916 	bzero(olddata[0], numbytes);
    917 	bzero(olddata[1], numbytes);
    918 	/* Begin the recovery decoding, initially buf[j],  ebuf, pbuf, dest[j]
    919 	 * have already pointed at the beginning of each source buffers and
    920 	 * destination buffers */
    921 	for (sector = startSector, i = 0; sector < endSector; sector++, i++) {
    922 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
    923 		for (j = 0; j < numDataCol; j++)
    924 			if ((j != fcol[0]) && (j != fcol[1]))
    925 				buf[j] += bytesPerSector;
    926 		dest[0] += bytesPerSector;
    927 		dest[1] += bytesPerSector;
    928 		ebuf += bytesPerSector;
    929 		pbuf += bytesPerSector;
    930 	}
    931 	/* after recovery, the buffer pointed by olddata[0] is the old failed
    932 	 * data. With new writing data and this old data, use small write to
    933 	 * calculate the new redundant informations */
    934 	/* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of
    935 	 * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol
    936 	 * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[
    937 	 * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol
    938 	 * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of
    939 	 * wudNodes; For current implementation, we assume the simplest case:
    940 	 * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1
    941 	 * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new
    942 	 * data to be writen to the failed disk. We first bxor the new data
    943 	 * into the old recovered data, then do the same things as small
    944 	 * write. */
    945 
    946 	rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes, node->dagHdr->bp);
    947 	/* do new 'E' calculation  */
    948 	/* find out the corresponding colume in encoding matrix for write
    949 	 * colume to be encoded into redundant disk 'E' */
    950 	scol = rf_EUCol(layoutPtr, fpda->raidAddress);
    951 	/* olddata[0] now is source buffer pointer; epda->bufPtr is the dest
    952 	 * buffer pointer               */
    953 	rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
    954 
    955 	/* do new 'P' calculation  */
    956 	rf_bxor(olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp);
    957 	/* Free the allocated buffer  */
    958 	RF_Free(olddata[0], numbytes);
    959 	RF_Free(olddata[1], numbytes);
    960 	RF_Free(buf, numDataCol * sizeof(char *));
    961 
    962 	RF_ETIMER_STOP(timer);
    963 	RF_ETIMER_EVAL(timer);
    964 	if (tracerec) {
    965 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    966 	}
    967 	rf_GenericWakeupFunc(node, 0);
    968 	return (0);
    969 }
    970