Home | History | Annotate | Line # | Download | only in raidframe
rf_evenodd_dagfuncs.c revision 1.8
      1 /*	$NetBSD: rf_evenodd_dagfuncs.c,v 1.8 2001/07/18 06:45:33 thorpej Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: ChangMing Wu
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  * Code for RAID-EVENODD  architecture.
     31  */
     32 
     33 #include "rf_archs.h"
     34 
     35 #if RF_INCLUDE_EVENODD > 0
     36 
     37 #include "rf_types.h"
     38 #include "rf_raid.h"
     39 #include "rf_dag.h"
     40 #include "rf_dagffrd.h"
     41 #include "rf_dagffwr.h"
     42 #include "rf_dagdegrd.h"
     43 #include "rf_dagdegwr.h"
     44 #include "rf_dagutils.h"
     45 #include "rf_dagfuncs.h"
     46 #include "rf_etimer.h"
     47 #include "rf_general.h"
     48 #include "rf_configure.h"
     49 #include "rf_parityscan.h"
     50 #include "rf_evenodd.h"
     51 #include "rf_evenodd_dagfuncs.h"
     52 
     53 /* These redundant functions are for small write */
     54 RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P"};
     55 RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E"};
     56 /* These redundant functions are for degraded read */
     57 RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
     58 RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func"};
     59 /**********************************************************************************************
     60  *   the following encoding node functions is used in  EO_000_CreateLargeWriteDAG
     61  **********************************************************************************************/
     62 int
     63 rf_RegularPEFunc(node)
     64 	RF_DagNode_t *node;
     65 {
     66 	rf_RegularESubroutine(node, node->results[1]);
     67 	rf_RegularXorFunc(node);/* does the wakeup here! */
     68 #if 1
     69 	return (0);		/* XXX This was missing... GO */
     70 #endif
     71 }
     72 
     73 
     74 /************************************************************************************************
     75  *  For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
     76  *  be used. The previous case is when write access at least sectors of full stripe unit.
     77  *  The later function is used when the write access two stripe units but with total sectors
     78  *  less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
     79  *  areas in their stripe unit and  parity write and 'E' write are both devided into two distinct
     80  *  writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
     81  ************************************************************************************************/
     82 
     83 /* Algorithm:
     84      1. Store the difference of old data and new data in the Rod buffer.
     85      2. then encode this buffer into the buffer which already have old 'E' information inside it,
     86 	the result can be shown to be the new 'E' information.
     87      3. xor the Wnd buffer into the difference buffer to recover the  original old data.
     88    Here we have another alternative: to allocate a temporary buffer for storing the difference of
     89    old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
     90    take the same speed as the previous, and need more memory.
     91 */
     92 int
     93 rf_RegularONEFunc(node)
     94 	RF_DagNode_t *node;
     95 {
     96 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
     97 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
     98 	int     EpdaIndex = (node->numParams - 1) / 2 - 1;	/* the parameter of node
     99 								 * where you can find
    100 								 * e-pda */
    101 	int     i, k, retcode = 0;
    102 	int     suoffset, length;
    103 	RF_RowCol_t scol;
    104 	char   *srcbuf, *destbuf;
    105 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    106 	RF_Etimer_t timer;
    107 	RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
    108 	int     ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);	/* generally zero  */
    109 
    110 	RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
    111 	RF_ASSERT(ESUOffset == 0);
    112 
    113 	RF_ETIMER_START(timer);
    114 
    115 	/* Xor the Wnd buffer into Rod buffer, the difference of old data and
    116 	 * new data is stored in Rod buffer */
    117 	for (k = 0; k < EpdaIndex; k += 2) {
    118 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
    119 		retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp);
    120 	}
    121 	/* Start to encoding the buffer storing the difference of old data and
    122 	 * new data into 'E' buffer  */
    123 	for (i = 0; i < EpdaIndex; i += 2)
    124 		if (node->params[i + 1].p != node->results[0]) {	/* results[0] is buf ptr
    125 									 * of E */
    126 			pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    127 			srcbuf = (char *) node->params[i + 1].p;
    128 			scol = rf_EUCol(layoutPtr, pda->raidAddress);
    129 			suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    130 			destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset);
    131 			rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    132 		}
    133 	/* Recover the original old data to be used by parity encoding
    134 	 * function in XorNode */
    135 	for (k = 0; k < EpdaIndex; k += 2) {
    136 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
    137 		retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp);
    138 	}
    139 	RF_ETIMER_STOP(timer);
    140 	RF_ETIMER_EVAL(timer);
    141 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    142 	rf_GenericWakeupFunc(node, 0);
    143 #if 1
    144 	return (0);		/* XXX this was missing.. GO */
    145 #endif
    146 }
    147 
    148 int
    149 rf_SimpleONEFunc(node)
    150 	RF_DagNode_t *node;
    151 {
    152 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    153 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    154 	RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
    155 	int     retcode = 0;
    156 	char   *srcbuf, *destbuf;
    157 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    158 	int     length;
    159 	RF_RowCol_t scol;
    160 	RF_Etimer_t timer;
    161 
    162 	RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q);
    163 	if (node->dagHdr->status == rf_enable) {
    164 		RF_ETIMER_START(timer);
    165 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector);	/* this is a pda of
    166 														 * writeDataNodes */
    167 		/* bxor to buffer of readDataNodes */
    168 		retcode = rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
    169 		/* find out the corresponding colume in encoding matrix for
    170 		 * write colume to be encoded into redundant disk 'E' */
    171 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    172 		srcbuf = node->params[1].p;
    173 		destbuf = node->params[3].p;
    174 		/* Start encoding process */
    175 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    176 		rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
    177 		RF_ETIMER_STOP(timer);
    178 		RF_ETIMER_EVAL(timer);
    179 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    180 
    181 	}
    182 	return (rf_GenericWakeupFunc(node, retcode));	/* call wake func
    183 							 * explicitly since no
    184 							 * I/O in this node */
    185 }
    186 
    187 
    188 /****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write  ********/
    189 void
    190 rf_RegularESubroutine(node, ebuf)
    191 	RF_DagNode_t *node;
    192 	char   *ebuf;
    193 {
    194 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    195 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    196 	RF_PhysDiskAddr_t *pda;
    197 	int     i, suoffset;
    198 	RF_RowCol_t scol;
    199 	char   *srcbuf, *destbuf;
    200 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    201 	RF_Etimer_t timer;
    202 
    203 	RF_ETIMER_START(timer);
    204 	for (i = 0; i < node->numParams - 2; i += 2) {
    205 		RF_ASSERT(node->params[i + 1].p != ebuf);
    206 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    207 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    208 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    209 		srcbuf = (char *) node->params[i + 1].p;
    210 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset);
    211 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    212 	}
    213 	RF_ETIMER_STOP(timer);
    214 	RF_ETIMER_EVAL(timer);
    215 	tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    216 }
    217 
    218 
    219 /*******************************************************************************************
    220  *			 Used in  EO_001_CreateLargeWriteDAG
    221  ******************************************************************************************/
    222 int
    223 rf_RegularEFunc(node)
    224 	RF_DagNode_t *node;
    225 {
    226 	rf_RegularESubroutine(node, node->results[0]);
    227 	rf_GenericWakeupFunc(node, 0);
    228 #if 1
    229 	return (0);		/* XXX this was missing?.. GO */
    230 #endif
    231 }
    232 /*******************************************************************************************
    233  * This degraded function allow only two case:
    234  *  1. when write access the full failed stripe unit, then the access can be more than
    235  *     one tripe units.
    236  *  2. when write access only part of the failed SU, we assume accesses of more than
    237  *     one stripe unit is not allowed so that the write can be dealt with like a
    238  *     large write.
    239  *  The following function is based on these assumptions. So except in the second case,
    240  *  it looks the same as a large write encodeing function. But this is not exactly the
    241  *  normal way for doing a degraded write, since raidframe have to break cases of access
    242  *  other than the above two into smaller accesses. We may have to change
    243  *  DegrESubroutin in the future.
    244  *******************************************************************************************/
    245 void
    246 rf_DegrESubroutine(node, ebuf)
    247 	RF_DagNode_t *node;
    248 	char   *ebuf;
    249 {
    250 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    251 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    252 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    253 	RF_PhysDiskAddr_t *pda;
    254 	int     i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    255 	RF_RowCol_t scol;
    256 	char   *srcbuf, *destbuf;
    257 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    258 	RF_Etimer_t timer;
    259 
    260 	RF_ETIMER_START(timer);
    261 	for (i = 0; i < node->numParams - 2; i += 2) {
    262 		RF_ASSERT(node->params[i + 1].p != ebuf);
    263 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    264 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    265 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    266 		srcbuf = (char *) node->params[i + 1].p;
    267 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    268 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    269 	}
    270 
    271 	RF_ETIMER_STOP(timer);
    272 	RF_ETIMER_EVAL(timer);
    273 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    274 }
    275 
    276 
    277 /**************************************************************************************
    278  * This function is used in case where one data disk failed and both redundant disks
    279  * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
    280  * failed in the stripe but not accessed at this time, then we should, instead, use
    281  * the rf_EOWriteDoubleRecoveryFunc().
    282  **************************************************************************************/
    283 int
    284 rf_Degraded_100_EOFunc(node)
    285 	RF_DagNode_t *node;
    286 {
    287 	rf_DegrESubroutine(node, node->results[1]);
    288 	rf_RecoveryXorFunc(node);	/* does the wakeup here! */
    289 #if 1
    290 	return (0);		/* XXX this was missing... SHould these be
    291 				 * void functions??? GO */
    292 #endif
    293 }
    294 /**************************************************************************************
    295  * This function is to encode one sector in one of the data disks to the E disk.
    296  * However, in evenodd this function can also be used as decoding function to recover
    297  * data from dead disk in the case of parity failure and a single data failure.
    298  **************************************************************************************/
    299 void
    300 rf_e_EncOneSect(
    301     RF_RowCol_t srcLogicCol,
    302     char *srcSecbuf,
    303     RF_RowCol_t destLogicCol,
    304     char *destSecbuf,
    305     int bytesPerSector)
    306 {
    307 	int     S_index;	/* index of the EU in the src col which need
    308 				 * be Xored into all EUs in a dest sector */
    309 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
    310 	RF_RowCol_t j, indexInDest,	/* row index of an encoding unit in
    311 					 * the destination colume of encoding
    312 					 * matrix */
    313 	        indexInSrc;	/* row index of an encoding unit in the source
    314 				 * colume used for recovery */
    315 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
    316 
    317 #if RF_EO_MATRIX_DIM > 17
    318 	int     shortsPerEU = bytesPerEU / sizeof(short);
    319 	short  *destShortBuf, *srcShortBuf1, *srcShortBuf2;
    320 	short temp1;
    321 #elif RF_EO_MATRIX_DIM == 17
    322 	int     longsPerEU = bytesPerEU / sizeof(long);
    323 	long   *destLongBuf, *srcLongBuf1, *srcLongBuf2;
    324 	long temp1;
    325 #endif
    326 
    327 #if RF_EO_MATRIX_DIM > 17
    328 	RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1);
    329 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
    330 #elif RF_EO_MATRIX_DIM == 17
    331 	RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4);
    332 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
    333 #endif
    334 
    335 	S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    336 #if RF_EO_MATRIX_DIM > 17
    337 	srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU);
    338 #elif RF_EO_MATRIX_DIM == 17
    339 	srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU);
    340 #endif
    341 
    342 	for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) {
    343 		indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    344 
    345 #if RF_EO_MATRIX_DIM > 17
    346 		destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU);
    347 		srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU);
    348 		for (j = 0; j < shortsPerEU; j++) {
    349 			temp1 = destShortBuf[j] ^ srcShortBuf1[j];
    350 			/* note: S_index won't be at the end row for any src
    351 			 * col! */
    352 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
    353 				destShortBuf[j] = (srcShortBuf2[j]) ^ temp1;
    354 			/* if indexInSrc is at the end row, ie.
    355 			 * RF_EO_MATRIX_DIM -1, then all elements are zero! */
    356 			else
    357 				destShortBuf[j] = temp1;
    358 		}
    359 
    360 #elif RF_EO_MATRIX_DIM == 17
    361 		destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU);
    362 		srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU);
    363 		for (j = 0; j < longsPerEU; j++) {
    364 			temp1 = destLongBuf[j] ^ srcLongBuf1[j];
    365 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
    366 				destLongBuf[j] = (srcLongBuf2[j]) ^ temp1;
    367 			else
    368 				destLongBuf[j] = temp1;
    369 		}
    370 #endif
    371 	}
    372 }
    373 
    374 void
    375 rf_e_encToBuf(
    376     RF_Raid_t * raidPtr,
    377     RF_RowCol_t srcLogicCol,
    378     char *srcbuf,
    379     RF_RowCol_t destLogicCol,
    380     char *destbuf,
    381     int numSector)
    382 {
    383 	int     i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    384 
    385 	for (i = 0; i < numSector; i++) {
    386 		rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
    387 		srcbuf += bytesPerSector;
    388 		destbuf += bytesPerSector;
    389 	}
    390 }
    391 /**************************************************************************************
    392  * when parity die and one data die, We use second redundant information, 'E',
    393  * to recover the data in dead disk. This function is used in the recovery node of
    394  * for EO_110_CreateReadDAG
    395  **************************************************************************************/
    396 int
    397 rf_RecoveryEFunc(node)
    398 	RF_DagNode_t *node;
    399 {
    400 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    401 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    402 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    403 	RF_RowCol_t scol,	/* source logical column */
    404 	        fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress);	/* logical column of
    405 									 * failed SU */
    406 	int     i;
    407 	RF_PhysDiskAddr_t *pda;
    408 	int     suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    409 	char   *srcbuf, *destbuf;
    410 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    411 	RF_Etimer_t timer;
    412 
    413 	memset((char *) node->results[0], 0,
    414 	    rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
    415 	if (node->dagHdr->status == rf_enable) {
    416 		RF_ETIMER_START(timer);
    417 		for (i = 0; i < node->numParams - 2; i += 2)
    418 			if (node->params[i + 1].p != node->results[0]) {
    419 				pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    420 				if (i == node->numParams - 4)
    421 					scol = RF_EO_MATRIX_DIM - 2;	/* the colume of
    422 									 * redundant E */
    423 				else
    424 					scol = rf_EUCol(layoutPtr, pda->raidAddress);
    425 				srcbuf = (char *) node->params[i + 1].p;
    426 				suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    427 				destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    428 				rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
    429 			}
    430 		RF_ETIMER_STOP(timer);
    431 		RF_ETIMER_EVAL(timer);
    432 		tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    433 	}
    434 	return (rf_GenericWakeupFunc(node, 0));	/* node execute successfully */
    435 }
    436 /**************************************************************************************
    437  * This function is used in the case where one data and the parity have filed.
    438  * (in EO_110_CreateWriteDAG )
    439  **************************************************************************************/
    440 int
    441 rf_EO_DegradedWriteEFunc(RF_DagNode_t * node)
    442 {
    443 	rf_DegrESubroutine(node, node->results[0]);
    444 	rf_GenericWakeupFunc(node, 0);
    445 #if 1
    446 	return (0);		/* XXX Yet another one!! GO */
    447 #endif
    448 }
    449 
    450 
    451 
    452 /**************************************************************************************
    453  *  		THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
    454  **************************************************************************************/
    455 
    456 void
    457 rf_doubleEOdecode(
    458     RF_Raid_t * raidPtr,
    459     char **rrdbuf,
    460     char **dest,
    461     RF_RowCol_t * fcol,
    462     char *pbuf,
    463     char *ebuf)
    464 {
    465 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    466 	int     i, j, k, f1, f2, row;
    467 	int     rrdrow, erow, count = 0;
    468 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    469 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
    470 #if 0
    471 	int     pcol = (RF_EO_MATRIX_DIM) - 1;
    472 #endif
    473 	int     ecol = (RF_EO_MATRIX_DIM) - 2;
    474 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
    475 	int     numDataCol = layoutPtr->numDataCol;
    476 #if RF_EO_MATRIX_DIM > 17
    477 	int     shortsPerEU = bytesPerEU / sizeof(short);
    478 	short  *rrdbuf_current, *pbuf_current, *ebuf_current;
    479 	short  *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    480 	short *temp;
    481 	short  *P;
    482 
    483 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
    484 	RF_Malloc(P, bytesPerEU, (short *));
    485 	RF_Malloc(temp, bytesPerEU, (short *));
    486 #elif RF_EO_MATRIX_DIM == 17
    487 	int     longsPerEU = bytesPerEU / sizeof(long);
    488 	long   *rrdbuf_current, *pbuf_current, *ebuf_current;
    489 	long   *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    490 	long *temp;
    491 	long   *P;
    492 
    493 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
    494 	RF_Malloc(P, bytesPerEU, (long *));
    495 	RF_Malloc(temp, bytesPerEU, (long *));
    496 #endif
    497 	RF_ASSERT(*((long *) dest[0]) == 0);
    498 	RF_ASSERT(*((long *) dest[1]) == 0);
    499 	memset((char *) P, 0, bytesPerEU);
    500 	memset((char *) temp, 0, bytesPerEU);
    501 	RF_ASSERT(*P == 0);
    502 	/* calculate the 'P' parameter, which, not parity, is the Xor of all
    503 	 * elements in the last two column, ie. 'E' and 'parity' colume, see
    504 	 * the Ref. paper by Blaum, et al 1993  */
    505 	for (i = 0; i < numRowInEncMatix; i++)
    506 		for (k = 0; k < longsPerEU; k++) {
    507 #if RF_EO_MATRIX_DIM > 17
    508 			ebuf_current = ((short *) ebuf) + i * shortsPerEU + k;
    509 			pbuf_current = ((short *) pbuf) + i * shortsPerEU + k;
    510 #elif RF_EO_MATRIX_DIM == 17
    511 			ebuf_current = ((long *) ebuf) + i * longsPerEU + k;
    512 			pbuf_current = ((long *) pbuf) + i * longsPerEU + k;
    513 #endif
    514 			P[k] ^= *ebuf_current;
    515 			P[k] ^= *pbuf_current;
    516 		}
    517 	RF_ASSERT(fcol[0] != fcol[1]);
    518 	if (fcol[0] < fcol[1]) {
    519 #if RF_EO_MATRIX_DIM > 17
    520 		dest_smaller = (short *) (dest[0]);
    521 		dest_larger = (short *) (dest[1]);
    522 #elif RF_EO_MATRIX_DIM == 17
    523 		dest_smaller = (long *) (dest[0]);
    524 		dest_larger = (long *) (dest[1]);
    525 #endif
    526 		f1 = fcol[0];
    527 		f2 = fcol[1];
    528 	} else {
    529 #if RF_EO_MATRIX_DIM > 17
    530 		dest_smaller = (short *) (dest[1]);
    531 		dest_larger = (short *) (dest[0]);
    532 #elif RF_EO_MATRIX_DIM == 17
    533 		dest_smaller = (long *) (dest[1]);
    534 		dest_larger = (long *) (dest[0]);
    535 #endif
    536 		f1 = fcol[1];
    537 		f2 = fcol[0];
    538 	}
    539 	row = (RF_EO_MATRIX_DIM) - 1;
    540 	while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) {
    541 #if RF_EO_MATRIX_DIM > 17
    542 		dest_larger_current = dest_larger + row * shortsPerEU;
    543 		dest_smaller_current = dest_smaller + row * shortsPerEU;
    544 #elif RF_EO_MATRIX_DIM == 17
    545 		dest_larger_current = dest_larger + row * longsPerEU;
    546 		dest_smaller_current = dest_smaller + row * longsPerEU;
    547 #endif
    548 		/**    Do the diagonal recovery. Initially, temp[k] = (failed 1),
    549 		       which is the failed data in the colume which has smaller col index. **/
    550 		/* step 1:  ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3))         */
    551 		for (j = 0; j < numDataCol; j++) {
    552 			if (j == f1 || j == f2)
    553 				continue;
    554 			rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM);
    555 			if (rrdrow != (RF_EO_MATRIX_DIM) - 1) {
    556 #if RF_EO_MATRIX_DIM > 17
    557 				rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU;
    558 				for (k = 0; k < shortsPerEU; k++)
    559 					temp[k] ^= *(rrdbuf_current + k);
    560 #elif RF_EO_MATRIX_DIM == 17
    561 				rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU;
    562 				for (k = 0; k < longsPerEU; k++)
    563 					temp[k] ^= *(rrdbuf_current + k);
    564 #endif
    565 			}
    566 		}
    567 		/* step 2:  ^E(erow,m-2), If erow is at the buttom row, don't
    568 		 * Xor into it  E(erow,m-2) = (principle diagonal) ^ (failed
    569 		 * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal
    570 		 * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle
    571 		 * diagonal) ^ (failed 2)       */
    572 
    573 		erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM));
    574 		if (erow != (RF_EO_MATRIX_DIM) - 1) {
    575 #if RF_EO_MATRIX_DIM > 17
    576 			ebuf_current = (short *) ebuf + shortsPerEU * erow;
    577 			for (k = 0; k < shortsPerEU; k++)
    578 				temp[k] ^= *(ebuf_current + k);
    579 #elif RF_EO_MATRIX_DIM == 17
    580 			ebuf_current = (long *) ebuf + longsPerEU * erow;
    581 			for (k = 0; k < longsPerEU; k++)
    582 				temp[k] ^= *(ebuf_current + k);
    583 #endif
    584 		}
    585 		/* step 3: ^P to obtain the failed data (failed 2).  P can be
    586 		 * proved to be actually  (principle diagonal)  After this
    587 		 * step, temp[k] = (failed 2), the failed data to be recovered */
    588 #if RF_EO_MATRIX_DIM > 17
    589 		for (k = 0; k < shortsPerEU; k++)
    590 			temp[k] ^= P[k];
    591 		/* Put the data to the destination buffer                              */
    592 		for (k = 0; k < shortsPerEU; k++)
    593 			dest_larger_current[k] = temp[k];
    594 #elif RF_EO_MATRIX_DIM == 17
    595 		for (k = 0; k < longsPerEU; k++)
    596 			temp[k] ^= P[k];
    597 		/* Put the data to the destination buffer                              */
    598 		for (k = 0; k < longsPerEU; k++)
    599 			dest_larger_current[k] = temp[k];
    600 #endif
    601 
    602 		/**          THE FOLLOWING DO THE HORIZONTAL XOR                **/
    603 		/* step 1:  ^(SUM of A(row,0..m-3)), ie. all nonfailed data
    604 		 * columes    */
    605 		for (j = 0; j < numDataCol; j++) {
    606 			if (j == f1 || j == f2)
    607 				continue;
    608 #if RF_EO_MATRIX_DIM > 17
    609 			rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU;
    610 			for (k = 0; k < shortsPerEU; k++)
    611 				temp[k] ^= *(rrdbuf_current + k);
    612 #elif RF_EO_MATRIX_DIM == 17
    613 			rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU;
    614 			for (k = 0; k < longsPerEU; k++)
    615 				temp[k] ^= *(rrdbuf_current + k);
    616 #endif
    617 		}
    618 		/* step 2: ^A(row,m-1) */
    619 		/* step 3: Put the data to the destination buffer                             	 */
    620 #if RF_EO_MATRIX_DIM > 17
    621 		pbuf_current = (short *) pbuf + shortsPerEU * row;
    622 		for (k = 0; k < shortsPerEU; k++)
    623 			temp[k] ^= *(pbuf_current + k);
    624 		for (k = 0; k < shortsPerEU; k++)
    625 			dest_smaller_current[k] = temp[k];
    626 #elif RF_EO_MATRIX_DIM == 17
    627 		pbuf_current = (long *) pbuf + longsPerEU * row;
    628 		for (k = 0; k < longsPerEU; k++)
    629 			temp[k] ^= *(pbuf_current + k);
    630 		for (k = 0; k < longsPerEU; k++)
    631 			dest_smaller_current[k] = temp[k];
    632 #endif
    633 		count++;
    634 	}
    635 	/* Check if all Encoding Unit in the data buffer have been decoded,
    636 	 * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
    637 	 * this algorithm will covered all buffer 				 */
    638 	RF_ASSERT(count == numRowInEncMatix);
    639 	RF_Free((char *) P, bytesPerEU);
    640 	RF_Free((char *) temp, bytesPerEU);
    641 }
    642 
    643 
    644 /***************************************************************************************
    645 * 	This function is called by double degragded read
    646 * 	EO_200_CreateReadDAG
    647 *
    648 ***************************************************************************************/
    649 int
    650 rf_EvenOddDoubleRecoveryFunc(node)
    651 	RF_DagNode_t *node;
    652 {
    653 	int     ndataParam = 0;
    654 	int     np = node->numParams;
    655 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    656 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    657 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    658 	int     i, prm, sector, nresults = node->numResults;
    659 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    660 	unsigned sosAddr;
    661 	int     two = 0, mallc_one = 0, mallc_two = 0;	/* flags to indicate if
    662 							 * memory is allocated */
    663 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    664 	RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1,
    665 	        npda;
    666 	RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
    667 	char  **buf, *ebuf, *pbuf, *dest[2];
    668 	long   *suoff = NULL, *suend = NULL, *prmToCol = NULL, psuoff, esuoff;
    669 	RF_SectorNum_t startSector, endSector;
    670 	RF_Etimer_t timer;
    671 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    672 
    673 	RF_ETIMER_START(timer);
    674 
    675 	/* Find out the number of parameters which are pdas for data
    676 	 * information */
    677 	for (i = 0; i <= np; i++)
    678 		if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) {
    679 			ndataParam = i;
    680 			break;
    681 		}
    682 	RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
    683 	if (ndataParam != 0) {
    684 		RF_Malloc(suoff, ndataParam * sizeof(long), (long *));
    685 		RF_Malloc(suend, ndataParam * sizeof(long), (long *));
    686 		RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *));
    687 	}
    688 	if (asmap->failedPDAs[1] &&
    689 	    (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
    690 		RF_ASSERT(0);	/* currently, no support for this situation */
    691 		ppda = node->params[np - 6].p;
    692 		ppda2 = node->params[np - 5].p;
    693 		RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY);
    694 		epda = node->params[np - 4].p;
    695 		epda2 = node->params[np - 3].p;
    696 		RF_ASSERT(epda2->type == RF_PDA_TYPE_Q);
    697 		two = 1;
    698 	} else {
    699 		ppda = node->params[np - 4].p;
    700 		epda = node->params[np - 3].p;
    701 		psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
    702 		esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
    703 		RF_ASSERT(psuoff == esuoff);
    704 	}
    705 	/*
    706             the followings have three goals:
    707             1. determine the startSector to begin decoding and endSector to end decoding.
    708             2. determine the colume numbers of the two failed disks.
    709             3. determine the offset and end offset of the access within each failed stripe unit.
    710          */
    711 	if (nresults == 1) {
    712 		/* find the startSector to begin decoding */
    713 		pda = node->results[0];
    714 		memset(pda->bufPtr, 0, bytesPerSector * pda->numSector);
    715 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    716 		fsuend[0] = fsuoff[0] + pda->numSector;
    717 		startSector = fsuoff[0];
    718 		endSector = fsuend[0];
    719 
    720 		/* find out the column of failed disk being accessed */
    721 		fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress);
    722 
    723 		/* find out the other failed colume not accessed */
    724 		sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    725 		for (i = 0; i < numDataCol; i++) {
    726 			npda.raidAddress = sosAddr + (i * secPerSU);
    727 			(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
    728 			/* skip over dead disks */
    729 			if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
    730 				if (i != fcol[0])
    731 					break;
    732 		}
    733 		RF_ASSERT(i < numDataCol);
    734 		fcol[1] = i;
    735 	} else {
    736 		RF_ASSERT(nresults == 2);
    737 		pda0 = node->results[0];
    738 		memset(pda0->bufPtr, 0, bytesPerSector * pda0->numSector);
    739 		pda1 = node->results[1];
    740 		memset(pda1->bufPtr, 0, bytesPerSector * pda1->numSector);
    741 		/* determine the failed colume numbers of the two failed
    742 		 * disks. */
    743 		fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress);
    744 		fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress);
    745 		/* determine the offset and end offset of the access within
    746 		 * each failed stripe unit. */
    747 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector);
    748 		fsuend[0] = fsuoff[0] + pda0->numSector;
    749 		fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector);
    750 		fsuend[1] = fsuoff[1] + pda1->numSector;
    751 		/* determine the startSector to begin decoding */
    752 		startSector = RF_MIN(pda0->startSector, pda1->startSector);
    753 		/* determine the endSector to end decoding */
    754 		endSector = RF_MAX(fsuend[0], fsuend[1]);
    755 	}
    756 	/*
    757 	      assign the beginning sector and the end sector for each parameter
    758 	      find out the corresponding colume # for each parameter
    759         */
    760 	for (prm = 0; prm < ndataParam; prm++) {
    761 		pda = node->params[prm].p;
    762 		suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    763 		suend[prm] = suoff[prm] + pda->numSector;
    764 		prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress);
    765 	}
    766 	/* 'sector' is the sector for the current decoding algorithm. For each
    767 	 * sector in the failed SU, find out the corresponding parameters that
    768 	 * cover the current sector and that are needed for decoding of this
    769 	 * sector in failed SU. 2.  Find out if sector is in the shadow of any
    770 	 * accessed failed SU. If not, malloc a temporary space of a sector in
    771 	 * size. */
    772 	for (sector = startSector; sector < endSector; sector++) {
    773 		if (nresults == 2)
    774 			if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1]))
    775 				continue;
    776 		for (prm = 0; prm < ndataParam; prm++)
    777 			if (suoff[prm] <= sector && sector < suend[prm])
    778 				buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr +
    779 				    rf_RaidAddressToByte(raidPtr, sector - suoff[prm]);
    780 		/* find out if sector is in the shadow of any accessed failed
    781 		 * SU. If yes, assign dest[0], dest[1] to point at suitable
    782 		 * position of the buffer corresponding to failed SUs. if no,
    783 		 * malloc a temporary space of a sector in size for
    784 		 * destination of decoding. */
    785 		RF_ASSERT(nresults == 1 || nresults == 2);
    786 		if (nresults == 1) {
    787 			dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
    788 			/* Always malloc temp buffer to dest[1]  */
    789 			RF_Malloc(dest[1], bytesPerSector, (char *));
    790 			memset(dest[1], 0, bytesPerSector);
    791 			mallc_two = 1;
    792 		} else {
    793 			if (fsuoff[0] <= sector && sector < fsuend[0])
    794 				dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
    795 			else {
    796 				RF_Malloc(dest[0], bytesPerSector, (char *));
    797 				memset(dest[0], 0, bytesPerSector);
    798 				mallc_one = 1;
    799 			}
    800 			if (fsuoff[1] <= sector && sector < fsuend[1])
    801 				dest[1] = ((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]);
    802 			else {
    803 				RF_Malloc(dest[1], bytesPerSector, (char *));
    804 				memset(dest[1], 0, bytesPerSector);
    805 				mallc_two = 1;
    806 			}
    807 			RF_ASSERT(mallc_one == 0 || mallc_two == 0);
    808 		}
    809 		pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff);
    810 		ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff);
    811 		/*
    812 	         * After finish finding all needed sectors, call doubleEOdecode function for decoding
    813 	         * one sector to destination.
    814 	         */
    815 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
    816 		/* free all allocated memory, and mark flag to indicate no
    817 		 * memory is being allocated */
    818 		if (mallc_one == 1)
    819 			RF_Free(dest[0], bytesPerSector);
    820 		if (mallc_two == 1)
    821 			RF_Free(dest[1], bytesPerSector);
    822 		mallc_one = mallc_two = 0;
    823 	}
    824 	RF_Free(buf, numDataCol * sizeof(char *));
    825 	if (ndataParam != 0) {
    826 		RF_Free(suoff, ndataParam * sizeof(long));
    827 		RF_Free(suend, ndataParam * sizeof(long));
    828 		RF_Free(prmToCol, ndataParam * sizeof(long));
    829 	}
    830 	RF_ETIMER_STOP(timer);
    831 	RF_ETIMER_EVAL(timer);
    832 	if (tracerec) {
    833 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    834 	}
    835 	rf_GenericWakeupFunc(node, 0);
    836 #if 1
    837 	return (0);		/* XXX is this even close!!?!?!!? GO */
    838 #endif
    839 }
    840 
    841 
    842 /* currently, only access of one of the two failed SU is allowed in this function.
    843  * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
    844  * many accesses of single stripe unit.
    845  */
    846 
    847 int
    848 rf_EOWriteDoubleRecoveryFunc(node)
    849 	RF_DagNode_t *node;
    850 {
    851 	int     np = node->numParams;
    852 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    853 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    854 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    855 	RF_SectorNum_t sector;
    856 	RF_RowCol_t col, scol;
    857 	int     prm, i, j;
    858 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    859 	unsigned sosAddr;
    860 	unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    861 	RF_int64 numbytes;
    862 	RF_SectorNum_t startSector, endSector;
    863 	RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda;
    864 	RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
    865 	char  **buf;		/* buf[0], buf[1], buf[2], ...etc. point to
    866 				 * buffer storing data read from col0, col1,
    867 				 * col2 */
    868 	char   *ebuf, *pbuf, *dest[2], *olddata[2];
    869 	RF_Etimer_t timer;
    870 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    871 
    872 	RF_ASSERT(asmap->numDataFailed == 1);	/* currently only support this
    873 						 * case, the other failed SU
    874 						 * is not being accessed */
    875 	RF_ETIMER_START(timer);
    876 	RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
    877 
    878 	ppda = node->results[0];/* Instead of being buffers, node->results[0]
    879 				 * and [1] are Ppda and Epda  */
    880 	epda = node->results[1];
    881 	fpda = asmap->failedPDAs[0];
    882 
    883 	/* First, recovery the failed old SU using EvenOdd double decoding      */
    884 	/* determine the startSector and endSector for decoding */
    885 	startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector);
    886 	endSector = startSector + fpda->numSector;
    887 	/* Assign buf[col] pointers to point to each non-failed colume  and
    888 	 * initialize the pbuf and ebuf to point at the beginning of each
    889 	 * source buffers and destination buffers */
    890 	for (prm = 0; prm < numDataCol - 2; prm++) {
    891 		pda = (RF_PhysDiskAddr_t *) node->params[prm].p;
    892 		col = rf_EUCol(layoutPtr, pda->raidAddress);
    893 		buf[col] = pda->bufPtr;
    894 	}
    895 	/* pbuf and ebuf:  they will change values as double recovery decoding
    896 	 * goes on */
    897 	pbuf = ppda->bufPtr;
    898 	ebuf = epda->bufPtr;
    899 	/* find out the logical colume numbers in the encoding matrix of the
    900 	 * two failed columes */
    901 	fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress);
    902 
    903 	/* find out the other failed colume not accessed this time */
    904 	sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    905 	for (i = 0; i < numDataCol; i++) {
    906 		npda.raidAddress = sosAddr + (i * secPerSU);
    907 		(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
    908 		/* skip over dead disks */
    909 		if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
    910 			if (i != fcol[0])
    911 				break;
    912 	}
    913 	RF_ASSERT(i < numDataCol);
    914 	fcol[1] = i;
    915 	/* assign temporary space to put recovered failed SU */
    916 	numbytes = fpda->numSector * bytesPerSector;
    917 	RF_Malloc(olddata[0], numbytes, (char *));
    918 	RF_Malloc(olddata[1], numbytes, (char *));
    919 	dest[0] = olddata[0];
    920 	dest[1] = olddata[1];
    921 	memset(olddata[0], 0, numbytes);
    922 	memset(olddata[1], 0, numbytes);
    923 	/* Begin the recovery decoding, initially buf[j],  ebuf, pbuf, dest[j]
    924 	 * have already pointed at the beginning of each source buffers and
    925 	 * destination buffers */
    926 	for (sector = startSector, i = 0; sector < endSector; sector++, i++) {
    927 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
    928 		for (j = 0; j < numDataCol; j++)
    929 			if ((j != fcol[0]) && (j != fcol[1]))
    930 				buf[j] += bytesPerSector;
    931 		dest[0] += bytesPerSector;
    932 		dest[1] += bytesPerSector;
    933 		ebuf += bytesPerSector;
    934 		pbuf += bytesPerSector;
    935 	}
    936 	/* after recovery, the buffer pointed by olddata[0] is the old failed
    937 	 * data. With new writing data and this old data, use small write to
    938 	 * calculate the new redundant informations */
    939 	/* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of
    940 	 * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol
    941 	 * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[
    942 	 * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol
    943 	 * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of
    944 	 * wudNodes; For current implementation, we assume the simplest case:
    945 	 * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1
    946 	 * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new
    947 	 * data to be writen to the failed disk. We first bxor the new data
    948 	 * into the old recovered data, then do the same things as small
    949 	 * write. */
    950 
    951 	rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes, node->dagHdr->bp);
    952 	/* do new 'E' calculation  */
    953 	/* find out the corresponding colume in encoding matrix for write
    954 	 * colume to be encoded into redundant disk 'E' */
    955 	scol = rf_EUCol(layoutPtr, fpda->raidAddress);
    956 	/* olddata[0] now is source buffer pointer; epda->bufPtr is the dest
    957 	 * buffer pointer               */
    958 	rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
    959 
    960 	/* do new 'P' calculation  */
    961 	rf_bxor(olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp);
    962 	/* Free the allocated buffer  */
    963 	RF_Free(olddata[0], numbytes);
    964 	RF_Free(olddata[1], numbytes);
    965 	RF_Free(buf, numDataCol * sizeof(char *));
    966 
    967 	RF_ETIMER_STOP(timer);
    968 	RF_ETIMER_EVAL(timer);
    969 	if (tracerec) {
    970 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    971 	}
    972 	rf_GenericWakeupFunc(node, 0);
    973 	return (0);
    974 }
    975 #endif				/* RF_INCLUDE_EVENODD > 0 */
    976