Home | History | Annotate | Line # | Download | only in raidframe
rf_evenodd_dagfuncs.c revision 1.9
      1 /*	$NetBSD: rf_evenodd_dagfuncs.c,v 1.9 2001/09/01 23:50:44 thorpej Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: ChangMing Wu
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  * Code for RAID-EVENODD  architecture.
     31  */
     32 
     33 #include "rf_archs.h"
     34 
     35 #if RF_INCLUDE_EVENODD > 0
     36 
     37 #include "rf_types.h"
     38 #include "rf_raid.h"
     39 #include "rf_dag.h"
     40 #include "rf_dagffrd.h"
     41 #include "rf_dagffwr.h"
     42 #include "rf_dagdegrd.h"
     43 #include "rf_dagdegwr.h"
     44 #include "rf_dagutils.h"
     45 #include "rf_dagfuncs.h"
     46 #include "rf_etimer.h"
     47 #include "rf_general.h"
     48 #include "rf_configure.h"
     49 #include "rf_parityscan.h"
     50 #include "rf_evenodd.h"
     51 #include "rf_evenodd_dagfuncs.h"
     52 
     53 /* These redundant functions are for small write */
     54 RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P"};
     55 RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E"};
     56 /* These redundant functions are for degraded read */
     57 RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
     58 RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func"};
     59 /**********************************************************************************************
     60  *   the following encoding node functions is used in  EO_000_CreateLargeWriteDAG
     61  **********************************************************************************************/
     62 int
     63 rf_RegularPEFunc(node)
     64 	RF_DagNode_t *node;
     65 {
     66 	rf_RegularESubroutine(node, node->results[1]);
     67 	rf_RegularXorFunc(node);/* does the wakeup here! */
     68 #if 1
     69 	return (0);		/* XXX This was missing... GO */
     70 #endif
     71 }
     72 
     73 
     74 /************************************************************************************************
     75  *  For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
     76  *  be used. The previous case is when write access at least sectors of full stripe unit.
     77  *  The later function is used when the write access two stripe units but with total sectors
     78  *  less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
     79  *  areas in their stripe unit and  parity write and 'E' write are both devided into two distinct
     80  *  writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
     81  ************************************************************************************************/
     82 
     83 /* Algorithm:
     84      1. Store the difference of old data and new data in the Rod buffer.
     85      2. then encode this buffer into the buffer which already have old 'E' information inside it,
     86 	the result can be shown to be the new 'E' information.
     87      3. xor the Wnd buffer into the difference buffer to recover the  original old data.
     88    Here we have another alternative: to allocate a temporary buffer for storing the difference of
     89    old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
     90    take the same speed as the previous, and need more memory.
     91 */
     92 int
     93 rf_RegularONEFunc(node)
     94 	RF_DagNode_t *node;
     95 {
     96 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
     97 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
     98 	int     EpdaIndex = (node->numParams - 1) / 2 - 1;	/* the parameter of node
     99 								 * where you can find
    100 								 * e-pda */
    101 	int     i, k, retcode = 0;
    102 	int     suoffset, length;
    103 	RF_RowCol_t scol;
    104 	char   *srcbuf, *destbuf;
    105 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    106 	RF_Etimer_t timer;
    107 	RF_PhysDiskAddr_t *pda;
    108 #ifdef RAID_DIAGNOSTIC
    109 	RF_PhysDiskAddr_t *EPDA =
    110 	    (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
    111 	int     ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);
    112 #endif /* RAID_DIAGNOSTIC */
    113 
    114 	RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
    115 	RF_ASSERT(ESUOffset == 0);
    116 
    117 	RF_ETIMER_START(timer);
    118 
    119 	/* Xor the Wnd buffer into Rod buffer, the difference of old data and
    120 	 * new data is stored in Rod buffer */
    121 	for (k = 0; k < EpdaIndex; k += 2) {
    122 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
    123 		retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp);
    124 	}
    125 	/* Start to encoding the buffer storing the difference of old data and
    126 	 * new data into 'E' buffer  */
    127 	for (i = 0; i < EpdaIndex; i += 2)
    128 		if (node->params[i + 1].p != node->results[0]) {	/* results[0] is buf ptr
    129 									 * of E */
    130 			pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    131 			srcbuf = (char *) node->params[i + 1].p;
    132 			scol = rf_EUCol(layoutPtr, pda->raidAddress);
    133 			suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    134 			destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset);
    135 			rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    136 		}
    137 	/* Recover the original old data to be used by parity encoding
    138 	 * function in XorNode */
    139 	for (k = 0; k < EpdaIndex; k += 2) {
    140 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
    141 		retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp);
    142 	}
    143 	RF_ETIMER_STOP(timer);
    144 	RF_ETIMER_EVAL(timer);
    145 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    146 	rf_GenericWakeupFunc(node, 0);
    147 #if 1
    148 	return (0);		/* XXX this was missing.. GO */
    149 #endif
    150 }
    151 
    152 int
    153 rf_SimpleONEFunc(node)
    154 	RF_DagNode_t *node;
    155 {
    156 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    157 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    158 	RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
    159 	int     retcode = 0;
    160 	char   *srcbuf, *destbuf;
    161 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    162 	int     length;
    163 	RF_RowCol_t scol;
    164 	RF_Etimer_t timer;
    165 
    166 	RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q);
    167 	if (node->dagHdr->status == rf_enable) {
    168 		RF_ETIMER_START(timer);
    169 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector);	/* this is a pda of
    170 														 * writeDataNodes */
    171 		/* bxor to buffer of readDataNodes */
    172 		retcode = rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
    173 		/* find out the corresponding colume in encoding matrix for
    174 		 * write colume to be encoded into redundant disk 'E' */
    175 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    176 		srcbuf = node->params[1].p;
    177 		destbuf = node->params[3].p;
    178 		/* Start encoding process */
    179 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    180 		rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
    181 		RF_ETIMER_STOP(timer);
    182 		RF_ETIMER_EVAL(timer);
    183 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    184 
    185 	}
    186 	return (rf_GenericWakeupFunc(node, retcode));	/* call wake func
    187 							 * explicitly since no
    188 							 * I/O in this node */
    189 }
    190 
    191 
    192 /****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write  ********/
    193 void
    194 rf_RegularESubroutine(node, ebuf)
    195 	RF_DagNode_t *node;
    196 	char   *ebuf;
    197 {
    198 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    199 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    200 	RF_PhysDiskAddr_t *pda;
    201 	int     i, suoffset;
    202 	RF_RowCol_t scol;
    203 	char   *srcbuf, *destbuf;
    204 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    205 	RF_Etimer_t timer;
    206 
    207 	RF_ETIMER_START(timer);
    208 	for (i = 0; i < node->numParams - 2; i += 2) {
    209 		RF_ASSERT(node->params[i + 1].p != ebuf);
    210 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    211 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    212 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    213 		srcbuf = (char *) node->params[i + 1].p;
    214 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset);
    215 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    216 	}
    217 	RF_ETIMER_STOP(timer);
    218 	RF_ETIMER_EVAL(timer);
    219 	tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    220 }
    221 
    222 
    223 /*******************************************************************************************
    224  *			 Used in  EO_001_CreateLargeWriteDAG
    225  ******************************************************************************************/
    226 int
    227 rf_RegularEFunc(node)
    228 	RF_DagNode_t *node;
    229 {
    230 	rf_RegularESubroutine(node, node->results[0]);
    231 	rf_GenericWakeupFunc(node, 0);
    232 #if 1
    233 	return (0);		/* XXX this was missing?.. GO */
    234 #endif
    235 }
    236 /*******************************************************************************************
    237  * This degraded function allow only two case:
    238  *  1. when write access the full failed stripe unit, then the access can be more than
    239  *     one tripe units.
    240  *  2. when write access only part of the failed SU, we assume accesses of more than
    241  *     one stripe unit is not allowed so that the write can be dealt with like a
    242  *     large write.
    243  *  The following function is based on these assumptions. So except in the second case,
    244  *  it looks the same as a large write encodeing function. But this is not exactly the
    245  *  normal way for doing a degraded write, since raidframe have to break cases of access
    246  *  other than the above two into smaller accesses. We may have to change
    247  *  DegrESubroutin in the future.
    248  *******************************************************************************************/
    249 void
    250 rf_DegrESubroutine(node, ebuf)
    251 	RF_DagNode_t *node;
    252 	char   *ebuf;
    253 {
    254 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    255 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    256 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    257 	RF_PhysDiskAddr_t *pda;
    258 	int     i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    259 	RF_RowCol_t scol;
    260 	char   *srcbuf, *destbuf;
    261 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    262 	RF_Etimer_t timer;
    263 
    264 	RF_ETIMER_START(timer);
    265 	for (i = 0; i < node->numParams - 2; i += 2) {
    266 		RF_ASSERT(node->params[i + 1].p != ebuf);
    267 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    268 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    269 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    270 		srcbuf = (char *) node->params[i + 1].p;
    271 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    272 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    273 	}
    274 
    275 	RF_ETIMER_STOP(timer);
    276 	RF_ETIMER_EVAL(timer);
    277 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    278 }
    279 
    280 
    281 /**************************************************************************************
    282  * This function is used in case where one data disk failed and both redundant disks
    283  * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
    284  * failed in the stripe but not accessed at this time, then we should, instead, use
    285  * the rf_EOWriteDoubleRecoveryFunc().
    286  **************************************************************************************/
    287 int
    288 rf_Degraded_100_EOFunc(node)
    289 	RF_DagNode_t *node;
    290 {
    291 	rf_DegrESubroutine(node, node->results[1]);
    292 	rf_RecoveryXorFunc(node);	/* does the wakeup here! */
    293 #if 1
    294 	return (0);		/* XXX this was missing... SHould these be
    295 				 * void functions??? GO */
    296 #endif
    297 }
    298 /**************************************************************************************
    299  * This function is to encode one sector in one of the data disks to the E disk.
    300  * However, in evenodd this function can also be used as decoding function to recover
    301  * data from dead disk in the case of parity failure and a single data failure.
    302  **************************************************************************************/
    303 void
    304 rf_e_EncOneSect(
    305     RF_RowCol_t srcLogicCol,
    306     char *srcSecbuf,
    307     RF_RowCol_t destLogicCol,
    308     char *destSecbuf,
    309     int bytesPerSector)
    310 {
    311 	int     S_index;	/* index of the EU in the src col which need
    312 				 * be Xored into all EUs in a dest sector */
    313 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
    314 	RF_RowCol_t j, indexInDest,	/* row index of an encoding unit in
    315 					 * the destination colume of encoding
    316 					 * matrix */
    317 	        indexInSrc;	/* row index of an encoding unit in the source
    318 				 * colume used for recovery */
    319 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
    320 
    321 #if RF_EO_MATRIX_DIM > 17
    322 	int     shortsPerEU = bytesPerEU / sizeof(short);
    323 	short  *destShortBuf, *srcShortBuf1, *srcShortBuf2;
    324 	short temp1;
    325 #elif RF_EO_MATRIX_DIM == 17
    326 	int     longsPerEU = bytesPerEU / sizeof(long);
    327 	long   *destLongBuf, *srcLongBuf1, *srcLongBuf2;
    328 	long temp1;
    329 #endif
    330 
    331 #if RF_EO_MATRIX_DIM > 17
    332 	RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1);
    333 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
    334 #elif RF_EO_MATRIX_DIM == 17
    335 	RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4);
    336 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
    337 #endif
    338 
    339 	S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    340 #if RF_EO_MATRIX_DIM > 17
    341 	srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU);
    342 #elif RF_EO_MATRIX_DIM == 17
    343 	srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU);
    344 #endif
    345 
    346 	for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) {
    347 		indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    348 
    349 #if RF_EO_MATRIX_DIM > 17
    350 		destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU);
    351 		srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU);
    352 		for (j = 0; j < shortsPerEU; j++) {
    353 			temp1 = destShortBuf[j] ^ srcShortBuf1[j];
    354 			/* note: S_index won't be at the end row for any src
    355 			 * col! */
    356 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
    357 				destShortBuf[j] = (srcShortBuf2[j]) ^ temp1;
    358 			/* if indexInSrc is at the end row, ie.
    359 			 * RF_EO_MATRIX_DIM -1, then all elements are zero! */
    360 			else
    361 				destShortBuf[j] = temp1;
    362 		}
    363 
    364 #elif RF_EO_MATRIX_DIM == 17
    365 		destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU);
    366 		srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU);
    367 		for (j = 0; j < longsPerEU; j++) {
    368 			temp1 = destLongBuf[j] ^ srcLongBuf1[j];
    369 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
    370 				destLongBuf[j] = (srcLongBuf2[j]) ^ temp1;
    371 			else
    372 				destLongBuf[j] = temp1;
    373 		}
    374 #endif
    375 	}
    376 }
    377 
    378 void
    379 rf_e_encToBuf(
    380     RF_Raid_t * raidPtr,
    381     RF_RowCol_t srcLogicCol,
    382     char *srcbuf,
    383     RF_RowCol_t destLogicCol,
    384     char *destbuf,
    385     int numSector)
    386 {
    387 	int     i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    388 
    389 	for (i = 0; i < numSector; i++) {
    390 		rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
    391 		srcbuf += bytesPerSector;
    392 		destbuf += bytesPerSector;
    393 	}
    394 }
    395 /**************************************************************************************
    396  * when parity die and one data die, We use second redundant information, 'E',
    397  * to recover the data in dead disk. This function is used in the recovery node of
    398  * for EO_110_CreateReadDAG
    399  **************************************************************************************/
    400 int
    401 rf_RecoveryEFunc(node)
    402 	RF_DagNode_t *node;
    403 {
    404 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    405 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    406 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    407 	RF_RowCol_t scol,	/* source logical column */
    408 	        fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress);	/* logical column of
    409 									 * failed SU */
    410 	int     i;
    411 	RF_PhysDiskAddr_t *pda;
    412 	int     suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    413 	char   *srcbuf, *destbuf;
    414 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    415 	RF_Etimer_t timer;
    416 
    417 	memset((char *) node->results[0], 0,
    418 	    rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
    419 	if (node->dagHdr->status == rf_enable) {
    420 		RF_ETIMER_START(timer);
    421 		for (i = 0; i < node->numParams - 2; i += 2)
    422 			if (node->params[i + 1].p != node->results[0]) {
    423 				pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    424 				if (i == node->numParams - 4)
    425 					scol = RF_EO_MATRIX_DIM - 2;	/* the colume of
    426 									 * redundant E */
    427 				else
    428 					scol = rf_EUCol(layoutPtr, pda->raidAddress);
    429 				srcbuf = (char *) node->params[i + 1].p;
    430 				suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    431 				destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    432 				rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
    433 			}
    434 		RF_ETIMER_STOP(timer);
    435 		RF_ETIMER_EVAL(timer);
    436 		tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    437 	}
    438 	return (rf_GenericWakeupFunc(node, 0));	/* node execute successfully */
    439 }
    440 /**************************************************************************************
    441  * This function is used in the case where one data and the parity have filed.
    442  * (in EO_110_CreateWriteDAG )
    443  **************************************************************************************/
    444 int
    445 rf_EO_DegradedWriteEFunc(RF_DagNode_t * node)
    446 {
    447 	rf_DegrESubroutine(node, node->results[0]);
    448 	rf_GenericWakeupFunc(node, 0);
    449 #if 1
    450 	return (0);		/* XXX Yet another one!! GO */
    451 #endif
    452 }
    453 
    454 
    455 
    456 /**************************************************************************************
    457  *  		THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
    458  **************************************************************************************/
    459 
    460 void
    461 rf_doubleEOdecode(
    462     RF_Raid_t * raidPtr,
    463     char **rrdbuf,
    464     char **dest,
    465     RF_RowCol_t * fcol,
    466     char *pbuf,
    467     char *ebuf)
    468 {
    469 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    470 	int     i, j, k, f1, f2, row;
    471 	int     rrdrow, erow, count = 0;
    472 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    473 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
    474 #if 0
    475 	int     pcol = (RF_EO_MATRIX_DIM) - 1;
    476 #endif
    477 	int     ecol = (RF_EO_MATRIX_DIM) - 2;
    478 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
    479 	int     numDataCol = layoutPtr->numDataCol;
    480 #if RF_EO_MATRIX_DIM > 17
    481 	int     shortsPerEU = bytesPerEU / sizeof(short);
    482 	short  *rrdbuf_current, *pbuf_current, *ebuf_current;
    483 	short  *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    484 	short *temp;
    485 	short  *P;
    486 
    487 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
    488 	RF_Malloc(P, bytesPerEU, (short *));
    489 	RF_Malloc(temp, bytesPerEU, (short *));
    490 #elif RF_EO_MATRIX_DIM == 17
    491 	int     longsPerEU = bytesPerEU / sizeof(long);
    492 	long   *rrdbuf_current, *pbuf_current, *ebuf_current;
    493 	long   *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    494 	long *temp;
    495 	long   *P;
    496 
    497 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
    498 	RF_Malloc(P, bytesPerEU, (long *));
    499 	RF_Malloc(temp, bytesPerEU, (long *));
    500 #endif
    501 	RF_ASSERT(*((long *) dest[0]) == 0);
    502 	RF_ASSERT(*((long *) dest[1]) == 0);
    503 	memset((char *) P, 0, bytesPerEU);
    504 	memset((char *) temp, 0, bytesPerEU);
    505 	RF_ASSERT(*P == 0);
    506 	/* calculate the 'P' parameter, which, not parity, is the Xor of all
    507 	 * elements in the last two column, ie. 'E' and 'parity' colume, see
    508 	 * the Ref. paper by Blaum, et al 1993  */
    509 	for (i = 0; i < numRowInEncMatix; i++)
    510 		for (k = 0; k < longsPerEU; k++) {
    511 #if RF_EO_MATRIX_DIM > 17
    512 			ebuf_current = ((short *) ebuf) + i * shortsPerEU + k;
    513 			pbuf_current = ((short *) pbuf) + i * shortsPerEU + k;
    514 #elif RF_EO_MATRIX_DIM == 17
    515 			ebuf_current = ((long *) ebuf) + i * longsPerEU + k;
    516 			pbuf_current = ((long *) pbuf) + i * longsPerEU + k;
    517 #endif
    518 			P[k] ^= *ebuf_current;
    519 			P[k] ^= *pbuf_current;
    520 		}
    521 	RF_ASSERT(fcol[0] != fcol[1]);
    522 	if (fcol[0] < fcol[1]) {
    523 #if RF_EO_MATRIX_DIM > 17
    524 		dest_smaller = (short *) (dest[0]);
    525 		dest_larger = (short *) (dest[1]);
    526 #elif RF_EO_MATRIX_DIM == 17
    527 		dest_smaller = (long *) (dest[0]);
    528 		dest_larger = (long *) (dest[1]);
    529 #endif
    530 		f1 = fcol[0];
    531 		f2 = fcol[1];
    532 	} else {
    533 #if RF_EO_MATRIX_DIM > 17
    534 		dest_smaller = (short *) (dest[1]);
    535 		dest_larger = (short *) (dest[0]);
    536 #elif RF_EO_MATRIX_DIM == 17
    537 		dest_smaller = (long *) (dest[1]);
    538 		dest_larger = (long *) (dest[0]);
    539 #endif
    540 		f1 = fcol[1];
    541 		f2 = fcol[0];
    542 	}
    543 	row = (RF_EO_MATRIX_DIM) - 1;
    544 	while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) {
    545 #if RF_EO_MATRIX_DIM > 17
    546 		dest_larger_current = dest_larger + row * shortsPerEU;
    547 		dest_smaller_current = dest_smaller + row * shortsPerEU;
    548 #elif RF_EO_MATRIX_DIM == 17
    549 		dest_larger_current = dest_larger + row * longsPerEU;
    550 		dest_smaller_current = dest_smaller + row * longsPerEU;
    551 #endif
    552 		/**    Do the diagonal recovery. Initially, temp[k] = (failed 1),
    553 		       which is the failed data in the colume which has smaller col index. **/
    554 		/* step 1:  ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3))         */
    555 		for (j = 0; j < numDataCol; j++) {
    556 			if (j == f1 || j == f2)
    557 				continue;
    558 			rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM);
    559 			if (rrdrow != (RF_EO_MATRIX_DIM) - 1) {
    560 #if RF_EO_MATRIX_DIM > 17
    561 				rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU;
    562 				for (k = 0; k < shortsPerEU; k++)
    563 					temp[k] ^= *(rrdbuf_current + k);
    564 #elif RF_EO_MATRIX_DIM == 17
    565 				rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU;
    566 				for (k = 0; k < longsPerEU; k++)
    567 					temp[k] ^= *(rrdbuf_current + k);
    568 #endif
    569 			}
    570 		}
    571 		/* step 2:  ^E(erow,m-2), If erow is at the buttom row, don't
    572 		 * Xor into it  E(erow,m-2) = (principle diagonal) ^ (failed
    573 		 * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal
    574 		 * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle
    575 		 * diagonal) ^ (failed 2)       */
    576 
    577 		erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM));
    578 		if (erow != (RF_EO_MATRIX_DIM) - 1) {
    579 #if RF_EO_MATRIX_DIM > 17
    580 			ebuf_current = (short *) ebuf + shortsPerEU * erow;
    581 			for (k = 0; k < shortsPerEU; k++)
    582 				temp[k] ^= *(ebuf_current + k);
    583 #elif RF_EO_MATRIX_DIM == 17
    584 			ebuf_current = (long *) ebuf + longsPerEU * erow;
    585 			for (k = 0; k < longsPerEU; k++)
    586 				temp[k] ^= *(ebuf_current + k);
    587 #endif
    588 		}
    589 		/* step 3: ^P to obtain the failed data (failed 2).  P can be
    590 		 * proved to be actually  (principle diagonal)  After this
    591 		 * step, temp[k] = (failed 2), the failed data to be recovered */
    592 #if RF_EO_MATRIX_DIM > 17
    593 		for (k = 0; k < shortsPerEU; k++)
    594 			temp[k] ^= P[k];
    595 		/* Put the data to the destination buffer                              */
    596 		for (k = 0; k < shortsPerEU; k++)
    597 			dest_larger_current[k] = temp[k];
    598 #elif RF_EO_MATRIX_DIM == 17
    599 		for (k = 0; k < longsPerEU; k++)
    600 			temp[k] ^= P[k];
    601 		/* Put the data to the destination buffer                              */
    602 		for (k = 0; k < longsPerEU; k++)
    603 			dest_larger_current[k] = temp[k];
    604 #endif
    605 
    606 		/**          THE FOLLOWING DO THE HORIZONTAL XOR                **/
    607 		/* step 1:  ^(SUM of A(row,0..m-3)), ie. all nonfailed data
    608 		 * columes    */
    609 		for (j = 0; j < numDataCol; j++) {
    610 			if (j == f1 || j == f2)
    611 				continue;
    612 #if RF_EO_MATRIX_DIM > 17
    613 			rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU;
    614 			for (k = 0; k < shortsPerEU; k++)
    615 				temp[k] ^= *(rrdbuf_current + k);
    616 #elif RF_EO_MATRIX_DIM == 17
    617 			rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU;
    618 			for (k = 0; k < longsPerEU; k++)
    619 				temp[k] ^= *(rrdbuf_current + k);
    620 #endif
    621 		}
    622 		/* step 2: ^A(row,m-1) */
    623 		/* step 3: Put the data to the destination buffer                             	 */
    624 #if RF_EO_MATRIX_DIM > 17
    625 		pbuf_current = (short *) pbuf + shortsPerEU * row;
    626 		for (k = 0; k < shortsPerEU; k++)
    627 			temp[k] ^= *(pbuf_current + k);
    628 		for (k = 0; k < shortsPerEU; k++)
    629 			dest_smaller_current[k] = temp[k];
    630 #elif RF_EO_MATRIX_DIM == 17
    631 		pbuf_current = (long *) pbuf + longsPerEU * row;
    632 		for (k = 0; k < longsPerEU; k++)
    633 			temp[k] ^= *(pbuf_current + k);
    634 		for (k = 0; k < longsPerEU; k++)
    635 			dest_smaller_current[k] = temp[k];
    636 #endif
    637 		count++;
    638 	}
    639 	/* Check if all Encoding Unit in the data buffer have been decoded,
    640 	 * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
    641 	 * this algorithm will covered all buffer 				 */
    642 	RF_ASSERT(count == numRowInEncMatix);
    643 	RF_Free((char *) P, bytesPerEU);
    644 	RF_Free((char *) temp, bytesPerEU);
    645 }
    646 
    647 
    648 /***************************************************************************************
    649 * 	This function is called by double degragded read
    650 * 	EO_200_CreateReadDAG
    651 *
    652 ***************************************************************************************/
    653 int
    654 rf_EvenOddDoubleRecoveryFunc(node)
    655 	RF_DagNode_t *node;
    656 {
    657 	int     ndataParam = 0;
    658 	int     np = node->numParams;
    659 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    660 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    661 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    662 	int     i, prm, sector, nresults = node->numResults;
    663 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    664 	unsigned sosAddr;
    665 	int     two = 0, mallc_one = 0, mallc_two = 0;	/* flags to indicate if
    666 							 * memory is allocated */
    667 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    668 	RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1,
    669 	        npda;
    670 	RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
    671 	char  **buf, *ebuf, *pbuf, *dest[2];
    672 	long   *suoff = NULL, *suend = NULL, *prmToCol = NULL, psuoff, esuoff;
    673 	RF_SectorNum_t startSector, endSector;
    674 	RF_Etimer_t timer;
    675 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    676 
    677 	RF_ETIMER_START(timer);
    678 
    679 	/* Find out the number of parameters which are pdas for data
    680 	 * information */
    681 	for (i = 0; i <= np; i++)
    682 		if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) {
    683 			ndataParam = i;
    684 			break;
    685 		}
    686 	RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
    687 	if (ndataParam != 0) {
    688 		RF_Malloc(suoff, ndataParam * sizeof(long), (long *));
    689 		RF_Malloc(suend, ndataParam * sizeof(long), (long *));
    690 		RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *));
    691 	}
    692 	if (asmap->failedPDAs[1] &&
    693 	    (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
    694 		RF_ASSERT(0);	/* currently, no support for this situation */
    695 		ppda = node->params[np - 6].p;
    696 		ppda2 = node->params[np - 5].p;
    697 		RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY);
    698 		epda = node->params[np - 4].p;
    699 		epda2 = node->params[np - 3].p;
    700 		RF_ASSERT(epda2->type == RF_PDA_TYPE_Q);
    701 		two = 1;
    702 	} else {
    703 		ppda = node->params[np - 4].p;
    704 		epda = node->params[np - 3].p;
    705 		psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
    706 		esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
    707 		RF_ASSERT(psuoff == esuoff);
    708 	}
    709 	/*
    710             the followings have three goals:
    711             1. determine the startSector to begin decoding and endSector to end decoding.
    712             2. determine the colume numbers of the two failed disks.
    713             3. determine the offset and end offset of the access within each failed stripe unit.
    714          */
    715 	if (nresults == 1) {
    716 		/* find the startSector to begin decoding */
    717 		pda = node->results[0];
    718 		memset(pda->bufPtr, 0, bytesPerSector * pda->numSector);
    719 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    720 		fsuend[0] = fsuoff[0] + pda->numSector;
    721 		startSector = fsuoff[0];
    722 		endSector = fsuend[0];
    723 
    724 		/* find out the column of failed disk being accessed */
    725 		fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress);
    726 
    727 		/* find out the other failed colume not accessed */
    728 		sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    729 		for (i = 0; i < numDataCol; i++) {
    730 			npda.raidAddress = sosAddr + (i * secPerSU);
    731 			(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
    732 			/* skip over dead disks */
    733 			if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
    734 				if (i != fcol[0])
    735 					break;
    736 		}
    737 		RF_ASSERT(i < numDataCol);
    738 		fcol[1] = i;
    739 	} else {
    740 		RF_ASSERT(nresults == 2);
    741 		pda0 = node->results[0];
    742 		memset(pda0->bufPtr, 0, bytesPerSector * pda0->numSector);
    743 		pda1 = node->results[1];
    744 		memset(pda1->bufPtr, 0, bytesPerSector * pda1->numSector);
    745 		/* determine the failed colume numbers of the two failed
    746 		 * disks. */
    747 		fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress);
    748 		fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress);
    749 		/* determine the offset and end offset of the access within
    750 		 * each failed stripe unit. */
    751 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector);
    752 		fsuend[0] = fsuoff[0] + pda0->numSector;
    753 		fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector);
    754 		fsuend[1] = fsuoff[1] + pda1->numSector;
    755 		/* determine the startSector to begin decoding */
    756 		startSector = RF_MIN(pda0->startSector, pda1->startSector);
    757 		/* determine the endSector to end decoding */
    758 		endSector = RF_MAX(fsuend[0], fsuend[1]);
    759 	}
    760 	/*
    761 	      assign the beginning sector and the end sector for each parameter
    762 	      find out the corresponding colume # for each parameter
    763         */
    764 	for (prm = 0; prm < ndataParam; prm++) {
    765 		pda = node->params[prm].p;
    766 		suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    767 		suend[prm] = suoff[prm] + pda->numSector;
    768 		prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress);
    769 	}
    770 	/* 'sector' is the sector for the current decoding algorithm. For each
    771 	 * sector in the failed SU, find out the corresponding parameters that
    772 	 * cover the current sector and that are needed for decoding of this
    773 	 * sector in failed SU. 2.  Find out if sector is in the shadow of any
    774 	 * accessed failed SU. If not, malloc a temporary space of a sector in
    775 	 * size. */
    776 	for (sector = startSector; sector < endSector; sector++) {
    777 		if (nresults == 2)
    778 			if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1]))
    779 				continue;
    780 		for (prm = 0; prm < ndataParam; prm++)
    781 			if (suoff[prm] <= sector && sector < suend[prm])
    782 				buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr +
    783 				    rf_RaidAddressToByte(raidPtr, sector - suoff[prm]);
    784 		/* find out if sector is in the shadow of any accessed failed
    785 		 * SU. If yes, assign dest[0], dest[1] to point at suitable
    786 		 * position of the buffer corresponding to failed SUs. if no,
    787 		 * malloc a temporary space of a sector in size for
    788 		 * destination of decoding. */
    789 		RF_ASSERT(nresults == 1 || nresults == 2);
    790 		if (nresults == 1) {
    791 			dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
    792 			/* Always malloc temp buffer to dest[1]  */
    793 			RF_Malloc(dest[1], bytesPerSector, (char *));
    794 			memset(dest[1], 0, bytesPerSector);
    795 			mallc_two = 1;
    796 		} else {
    797 			if (fsuoff[0] <= sector && sector < fsuend[0])
    798 				dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
    799 			else {
    800 				RF_Malloc(dest[0], bytesPerSector, (char *));
    801 				memset(dest[0], 0, bytesPerSector);
    802 				mallc_one = 1;
    803 			}
    804 			if (fsuoff[1] <= sector && sector < fsuend[1])
    805 				dest[1] = ((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]);
    806 			else {
    807 				RF_Malloc(dest[1], bytesPerSector, (char *));
    808 				memset(dest[1], 0, bytesPerSector);
    809 				mallc_two = 1;
    810 			}
    811 			RF_ASSERT(mallc_one == 0 || mallc_two == 0);
    812 		}
    813 		pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff);
    814 		ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff);
    815 		/*
    816 	         * After finish finding all needed sectors, call doubleEOdecode function for decoding
    817 	         * one sector to destination.
    818 	         */
    819 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
    820 		/* free all allocated memory, and mark flag to indicate no
    821 		 * memory is being allocated */
    822 		if (mallc_one == 1)
    823 			RF_Free(dest[0], bytesPerSector);
    824 		if (mallc_two == 1)
    825 			RF_Free(dest[1], bytesPerSector);
    826 		mallc_one = mallc_two = 0;
    827 	}
    828 	RF_Free(buf, numDataCol * sizeof(char *));
    829 	if (ndataParam != 0) {
    830 		RF_Free(suoff, ndataParam * sizeof(long));
    831 		RF_Free(suend, ndataParam * sizeof(long));
    832 		RF_Free(prmToCol, ndataParam * sizeof(long));
    833 	}
    834 	RF_ETIMER_STOP(timer);
    835 	RF_ETIMER_EVAL(timer);
    836 	if (tracerec) {
    837 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    838 	}
    839 	rf_GenericWakeupFunc(node, 0);
    840 #if 1
    841 	return (0);		/* XXX is this even close!!?!?!!? GO */
    842 #endif
    843 }
    844 
    845 
    846 /* currently, only access of one of the two failed SU is allowed in this function.
    847  * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
    848  * many accesses of single stripe unit.
    849  */
    850 
    851 int
    852 rf_EOWriteDoubleRecoveryFunc(node)
    853 	RF_DagNode_t *node;
    854 {
    855 	int     np = node->numParams;
    856 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    857 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    858 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    859 	RF_SectorNum_t sector;
    860 	RF_RowCol_t col, scol;
    861 	int     prm, i, j;
    862 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    863 	unsigned sosAddr;
    864 	unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    865 	RF_int64 numbytes;
    866 	RF_SectorNum_t startSector, endSector;
    867 	RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda;
    868 	RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
    869 	char  **buf;		/* buf[0], buf[1], buf[2], ...etc. point to
    870 				 * buffer storing data read from col0, col1,
    871 				 * col2 */
    872 	char   *ebuf, *pbuf, *dest[2], *olddata[2];
    873 	RF_Etimer_t timer;
    874 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    875 
    876 	RF_ASSERT(asmap->numDataFailed == 1);	/* currently only support this
    877 						 * case, the other failed SU
    878 						 * is not being accessed */
    879 	RF_ETIMER_START(timer);
    880 	RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
    881 
    882 	ppda = node->results[0];/* Instead of being buffers, node->results[0]
    883 				 * and [1] are Ppda and Epda  */
    884 	epda = node->results[1];
    885 	fpda = asmap->failedPDAs[0];
    886 
    887 	/* First, recovery the failed old SU using EvenOdd double decoding      */
    888 	/* determine the startSector and endSector for decoding */
    889 	startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector);
    890 	endSector = startSector + fpda->numSector;
    891 	/* Assign buf[col] pointers to point to each non-failed colume  and
    892 	 * initialize the pbuf and ebuf to point at the beginning of each
    893 	 * source buffers and destination buffers */
    894 	for (prm = 0; prm < numDataCol - 2; prm++) {
    895 		pda = (RF_PhysDiskAddr_t *) node->params[prm].p;
    896 		col = rf_EUCol(layoutPtr, pda->raidAddress);
    897 		buf[col] = pda->bufPtr;
    898 	}
    899 	/* pbuf and ebuf:  they will change values as double recovery decoding
    900 	 * goes on */
    901 	pbuf = ppda->bufPtr;
    902 	ebuf = epda->bufPtr;
    903 	/* find out the logical colume numbers in the encoding matrix of the
    904 	 * two failed columes */
    905 	fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress);
    906 
    907 	/* find out the other failed colume not accessed this time */
    908 	sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    909 	for (i = 0; i < numDataCol; i++) {
    910 		npda.raidAddress = sosAddr + (i * secPerSU);
    911 		(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
    912 		/* skip over dead disks */
    913 		if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
    914 			if (i != fcol[0])
    915 				break;
    916 	}
    917 	RF_ASSERT(i < numDataCol);
    918 	fcol[1] = i;
    919 	/* assign temporary space to put recovered failed SU */
    920 	numbytes = fpda->numSector * bytesPerSector;
    921 	RF_Malloc(olddata[0], numbytes, (char *));
    922 	RF_Malloc(olddata[1], numbytes, (char *));
    923 	dest[0] = olddata[0];
    924 	dest[1] = olddata[1];
    925 	memset(olddata[0], 0, numbytes);
    926 	memset(olddata[1], 0, numbytes);
    927 	/* Begin the recovery decoding, initially buf[j],  ebuf, pbuf, dest[j]
    928 	 * have already pointed at the beginning of each source buffers and
    929 	 * destination buffers */
    930 	for (sector = startSector, i = 0; sector < endSector; sector++, i++) {
    931 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
    932 		for (j = 0; j < numDataCol; j++)
    933 			if ((j != fcol[0]) && (j != fcol[1]))
    934 				buf[j] += bytesPerSector;
    935 		dest[0] += bytesPerSector;
    936 		dest[1] += bytesPerSector;
    937 		ebuf += bytesPerSector;
    938 		pbuf += bytesPerSector;
    939 	}
    940 	/* after recovery, the buffer pointed by olddata[0] is the old failed
    941 	 * data. With new writing data and this old data, use small write to
    942 	 * calculate the new redundant informations */
    943 	/* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of
    944 	 * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol
    945 	 * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[
    946 	 * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol
    947 	 * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of
    948 	 * wudNodes; For current implementation, we assume the simplest case:
    949 	 * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1
    950 	 * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new
    951 	 * data to be writen to the failed disk. We first bxor the new data
    952 	 * into the old recovered data, then do the same things as small
    953 	 * write. */
    954 
    955 	rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes, node->dagHdr->bp);
    956 	/* do new 'E' calculation  */
    957 	/* find out the corresponding colume in encoding matrix for write
    958 	 * colume to be encoded into redundant disk 'E' */
    959 	scol = rf_EUCol(layoutPtr, fpda->raidAddress);
    960 	/* olddata[0] now is source buffer pointer; epda->bufPtr is the dest
    961 	 * buffer pointer               */
    962 	rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
    963 
    964 	/* do new 'P' calculation  */
    965 	rf_bxor(olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp);
    966 	/* Free the allocated buffer  */
    967 	RF_Free(olddata[0], numbytes);
    968 	RF_Free(olddata[1], numbytes);
    969 	RF_Free(buf, numDataCol * sizeof(char *));
    970 
    971 	RF_ETIMER_STOP(timer);
    972 	RF_ETIMER_EVAL(timer);
    973 	if (tracerec) {
    974 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    975 	}
    976 	rf_GenericWakeupFunc(node, 0);
    977 	return (0);
    978 }
    979 #endif				/* RF_INCLUDE_EVENODD > 0 */
    980