Home | History | Annotate | Line # | Download | only in raidframe
rf_evenodd_dagfuncs.c revision 1.3.2.2
      1 /*	$NetBSD: rf_evenodd_dagfuncs.c,v 1.3.2.2 2001/02/11 19:16:15 bouyer Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: ChangMing Wu
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  * Code for RAID-EVENODD  architecture.
     31  */
     32 
     33 #include "rf_archs.h"
     34 
     35 #if RF_INCLUDE_EVENODD > 0
     36 
     37 #include "rf_types.h"
     38 #include "rf_raid.h"
     39 #include "rf_dag.h"
     40 #include "rf_dagffrd.h"
     41 #include "rf_dagffwr.h"
     42 #include "rf_dagdegrd.h"
     43 #include "rf_dagdegwr.h"
     44 #include "rf_dagutils.h"
     45 #include "rf_dagfuncs.h"
     46 #include "rf_etimer.h"
     47 #include "rf_general.h"
     48 #include "rf_configure.h"
     49 #include "rf_parityscan.h"
     50 #include "rf_evenodd.h"
     51 #include "rf_evenodd_dagfuncs.h"
     52 
     53 /* These redundant functions are for small write */
     54 RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P"};
     55 RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E"};
     56 /* These redundant functions are for degraded read */
     57 RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
     58 RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func"};
     59 /**********************************************************************************************
     60  *   the following encoding node functions is used in  EO_000_CreateLargeWriteDAG
     61  **********************************************************************************************/
     62 int
     63 rf_RegularPEFunc(node)
     64 	RF_DagNode_t *node;
     65 {
     66 	rf_RegularESubroutine(node, node->results[1]);
     67 	rf_RegularXorFunc(node);/* does the wakeup here! */
     68 #if 1
     69 	return (0);		/* XXX This was missing... GO */
     70 #endif
     71 }
     72 
     73 
     74 /************************************************************************************************
     75  *  For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
     76  *  be used. The previous case is when write access at least sectors of full stripe unit.
     77  *  The later function is used when the write access two stripe units but with total sectors
     78  *  less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
     79  *  areas in their stripe unit and  parity write and 'E' write are both devided into two distinct
     80  *  writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
     81  ************************************************************************************************/
     82 
     83 /* Algorithm:
     84      1. Store the difference of old data and new data in the Rod buffer.
     85      2. then encode this buffer into the buffer which already have old 'E' information inside it,
     86 	the result can be shown to be the new 'E' information.
     87      3. xor the Wnd buffer into the difference buffer to recover the  original old data.
     88    Here we have another alternative: to allocate a temporary buffer for storing the difference of
     89    old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
     90    take the same speed as the previous, and need more memory.
     91 */
     92 int
     93 rf_RegularONEFunc(node)
     94 	RF_DagNode_t *node;
     95 {
     96 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
     97 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
     98 	int     EpdaIndex = (node->numParams - 1) / 2 - 1;	/* the parameter of node
     99 								 * where you can find
    100 								 * e-pda */
    101 	int     i, k, retcode = 0;
    102 	int     suoffset, length;
    103 	RF_RowCol_t scol;
    104 	char   *srcbuf, *destbuf;
    105 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    106 	RF_Etimer_t timer;
    107 	RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
    108 	int     ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);	/* generally zero  */
    109 
    110 	RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
    111 	RF_ASSERT(ESUOffset == 0);
    112 
    113 	RF_ETIMER_START(timer);
    114 
    115 	/* Xor the Wnd buffer into Rod buffer, the difference of old data and
    116 	 * new data is stored in Rod buffer */
    117 	for (k = 0; k < EpdaIndex; k += 2) {
    118 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
    119 		retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp);
    120 	}
    121 	/* Start to encoding the buffer storing the difference of old data and
    122 	 * new data into 'E' buffer  */
    123 	for (i = 0; i < EpdaIndex; i += 2)
    124 		if (node->params[i + 1].p != node->results[0]) {	/* results[0] is buf ptr
    125 									 * of E */
    126 			pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    127 			srcbuf = (char *) node->params[i + 1].p;
    128 			scol = rf_EUCol(layoutPtr, pda->raidAddress);
    129 			suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    130 			destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset);
    131 			rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    132 		}
    133 	/* Recover the original old data to be used by parity encoding
    134 	 * function in XorNode */
    135 	for (k = 0; k < EpdaIndex; k += 2) {
    136 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
    137 		retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp);
    138 	}
    139 	RF_ETIMER_STOP(timer);
    140 	RF_ETIMER_EVAL(timer);
    141 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    142 	rf_GenericWakeupFunc(node, 0);
    143 #if 1
    144 	return (0);		/* XXX this was missing.. GO */
    145 #endif
    146 }
    147 
    148 int
    149 rf_SimpleONEFunc(node)
    150 	RF_DagNode_t *node;
    151 {
    152 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    153 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    154 	RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
    155 	int     retcode = 0;
    156 	char   *srcbuf, *destbuf;
    157 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    158 	int     length;
    159 	RF_RowCol_t scol;
    160 	RF_Etimer_t timer;
    161 
    162 	RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q);
    163 	if (node->dagHdr->status == rf_enable) {
    164 		RF_ETIMER_START(timer);
    165 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector);	/* this is a pda of
    166 														 * writeDataNodes */
    167 		/* bxor to buffer of readDataNodes */
    168 		retcode = rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
    169 		/* find out the corresponding colume in encoding matrix for
    170 		 * write colume to be encoded into redundant disk 'E' */
    171 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    172 		srcbuf = node->params[1].p;
    173 		destbuf = node->params[3].p;
    174 		/* Start encoding process */
    175 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    176 		rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
    177 		RF_ETIMER_STOP(timer);
    178 		RF_ETIMER_EVAL(timer);
    179 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    180 
    181 	}
    182 	return (rf_GenericWakeupFunc(node, retcode));	/* call wake func
    183 							 * explicitly since no
    184 							 * I/O in this node */
    185 }
    186 
    187 
    188 /****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write  ********/
    189 void
    190 rf_RegularESubroutine(node, ebuf)
    191 	RF_DagNode_t *node;
    192 	char   *ebuf;
    193 {
    194 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    195 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    196 	RF_PhysDiskAddr_t *pda;
    197 	int     i, suoffset;
    198 	RF_RowCol_t scol;
    199 	char   *srcbuf, *destbuf;
    200 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    201 	RF_Etimer_t timer;
    202 
    203 	RF_ETIMER_START(timer);
    204 	for (i = 0; i < node->numParams - 2; i += 2) {
    205 		RF_ASSERT(node->params[i + 1].p != ebuf);
    206 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    207 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    208 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    209 		srcbuf = (char *) node->params[i + 1].p;
    210 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset);
    211 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    212 	}
    213 	RF_ETIMER_STOP(timer);
    214 	RF_ETIMER_EVAL(timer);
    215 	tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    216 }
    217 
    218 
    219 /*******************************************************************************************
    220  *			 Used in  EO_001_CreateLargeWriteDAG
    221  ******************************************************************************************/
    222 int
    223 rf_RegularEFunc(node)
    224 	RF_DagNode_t *node;
    225 {
    226 	rf_RegularESubroutine(node, node->results[0]);
    227 	rf_GenericWakeupFunc(node, 0);
    228 #if 1
    229 	return (0);		/* XXX this was missing?.. GO */
    230 #endif
    231 }
    232 /*******************************************************************************************
    233  * This degraded function allow only two case:
    234  *  1. when write access the full failed stripe unit, then the access can be more than
    235  *     one tripe units.
    236  *  2. when write access only part of the failed SU, we assume accesses of more than
    237  *     one stripe unit is not allowed so that the write can be dealt with like a
    238  *     large write.
    239  *  The following function is based on these assumptions. So except in the second case,
    240  *  it looks the same as a large write encodeing function. But this is not exactly the
    241  *  normal way for doing a degraded write, since raidframe have to break cases of access
    242  *  other than the above two into smaller accesses. We may have to change
    243  *  DegrESubroutin in the future.
    244  *******************************************************************************************/
    245 void
    246 rf_DegrESubroutine(node, ebuf)
    247 	RF_DagNode_t *node;
    248 	char   *ebuf;
    249 {
    250 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    251 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    252 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    253 	RF_PhysDiskAddr_t *pda;
    254 	int     i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    255 	RF_RowCol_t scol;
    256 	char   *srcbuf, *destbuf;
    257 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    258 	RF_Etimer_t timer;
    259 
    260 	RF_ETIMER_START(timer);
    261 	for (i = 0; i < node->numParams - 2; i += 2) {
    262 		RF_ASSERT(node->params[i + 1].p != ebuf);
    263 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    264 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    265 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    266 		srcbuf = (char *) node->params[i + 1].p;
    267 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    268 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    269 	}
    270 
    271 	RF_ETIMER_STOP(timer);
    272 	RF_ETIMER_EVAL(timer);
    273 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    274 }
    275 
    276 
    277 /**************************************************************************************
    278  * This function is used in case where one data disk failed and both redundant disks
    279  * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
    280  * failed in the stripe but not accessed at this time, then we should, instead, use
    281  * the rf_EOWriteDoubleRecoveryFunc().
    282  **************************************************************************************/
    283 int
    284 rf_Degraded_100_EOFunc(node)
    285 	RF_DagNode_t *node;
    286 {
    287 	rf_DegrESubroutine(node, node->results[1]);
    288 	rf_RecoveryXorFunc(node);	/* does the wakeup here! */
    289 #if 1
    290 	return (0);		/* XXX this was missing... SHould these be
    291 				 * void functions??? GO */
    292 #endif
    293 }
    294 /**************************************************************************************
    295  * This function is to encode one sector in one of the data disks to the E disk.
    296  * However, in evenodd this function can also be used as decoding function to recover
    297  * data from dead disk in the case of parity failure and a single data failure.
    298  **************************************************************************************/
    299 void
    300 rf_e_EncOneSect(
    301     RF_RowCol_t srcLogicCol,
    302     char *srcSecbuf,
    303     RF_RowCol_t destLogicCol,
    304     char *destSecbuf,
    305     int bytesPerSector)
    306 {
    307 	int     S_index;	/* index of the EU in the src col which need
    308 				 * be Xored into all EUs in a dest sector */
    309 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
    310 	RF_RowCol_t j, indexInDest,	/* row index of an encoding unit in
    311 					 * the destination colume of encoding
    312 					 * matrix */
    313 	        indexInSrc;	/* row index of an encoding unit in the source
    314 				 * colume used for recovery */
    315 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
    316 
    317 #if RF_EO_MATRIX_DIM > 17
    318 	int     shortsPerEU = bytesPerEU / sizeof(short);
    319 	short  *destShortBuf, *srcShortBuf1, *srcShortBuf2;
    320 	short temp1;
    321 #elif RF_EO_MATRIX_DIM == 17
    322 	int     longsPerEU = bytesPerEU / sizeof(long);
    323 	long   *destLongBuf, *srcLongBuf1, *srcLongBuf2;
    324 	long temp1;
    325 #endif
    326 
    327 #if RF_EO_MATRIX_DIM > 17
    328 	RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1);
    329 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
    330 #elif RF_EO_MATRIX_DIM == 17
    331 	RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4);
    332 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
    333 #endif
    334 
    335 	S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    336 #if RF_EO_MATRIX_DIM > 17
    337 	srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU);
    338 #elif RF_EO_MATRIX_DIM == 17
    339 	srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU);
    340 #endif
    341 
    342 	for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) {
    343 		indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    344 
    345 #if RF_EO_MATRIX_DIM > 17
    346 		destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU);
    347 		srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU);
    348 		for (j = 0; j < shortsPerEU; j++) {
    349 			temp1 = destShortBuf[j] ^ srcShortBuf1[j];
    350 			/* note: S_index won't be at the end row for any src
    351 			 * col! */
    352 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
    353 				destShortBuf[j] = (srcShortBuf2[j]) ^ temp1;
    354 			/* if indexInSrc is at the end row, ie.
    355 			 * RF_EO_MATRIX_DIM -1, then all elements are zero! */
    356 			else
    357 				destShortBuf[j] = temp1;
    358 		}
    359 
    360 #elif RF_EO_MATRIX_DIM == 17
    361 		destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU);
    362 		srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU);
    363 		for (j = 0; j < longsPerEU; j++) {
    364 			temp1 = destLongBuf[j] ^ srcLongBuf1[j];
    365 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
    366 				destLongBuf[j] = (srcLongBuf2[j]) ^ temp1;
    367 			else
    368 				destLongBuf[j] = temp1;
    369 		}
    370 #endif
    371 	}
    372 }
    373 
    374 void
    375 rf_e_encToBuf(
    376     RF_Raid_t * raidPtr,
    377     RF_RowCol_t srcLogicCol,
    378     char *srcbuf,
    379     RF_RowCol_t destLogicCol,
    380     char *destbuf,
    381     int numSector)
    382 {
    383 	int     i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    384 
    385 	for (i = 0; i < numSector; i++) {
    386 		rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
    387 		srcbuf += bytesPerSector;
    388 		destbuf += bytesPerSector;
    389 	}
    390 }
    391 /**************************************************************************************
    392  * when parity die and one data die, We use second redundant information, 'E',
    393  * to recover the data in dead disk. This function is used in the recovery node of
    394  * for EO_110_CreateReadDAG
    395  **************************************************************************************/
    396 int
    397 rf_RecoveryEFunc(node)
    398 	RF_DagNode_t *node;
    399 {
    400 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    401 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    402 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    403 	RF_RowCol_t scol,	/* source logical column */
    404 	        fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress);	/* logical column of
    405 									 * failed SU */
    406 	int     i;
    407 	RF_PhysDiskAddr_t *pda;
    408 	int     suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    409 	char   *srcbuf, *destbuf;
    410 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    411 	RF_Etimer_t timer;
    412 
    413 	bzero((char *) node->results[0], rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
    414 	if (node->dagHdr->status == rf_enable) {
    415 		RF_ETIMER_START(timer);
    416 		for (i = 0; i < node->numParams - 2; i += 2)
    417 			if (node->params[i + 1].p != node->results[0]) {
    418 				pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    419 				if (i == node->numParams - 4)
    420 					scol = RF_EO_MATRIX_DIM - 2;	/* the colume of
    421 									 * redundant E */
    422 				else
    423 					scol = rf_EUCol(layoutPtr, pda->raidAddress);
    424 				srcbuf = (char *) node->params[i + 1].p;
    425 				suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    426 				destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    427 				rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
    428 			}
    429 		RF_ETIMER_STOP(timer);
    430 		RF_ETIMER_EVAL(timer);
    431 		tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    432 	}
    433 	return (rf_GenericWakeupFunc(node, 0));	/* node execute successfully */
    434 }
    435 /**************************************************************************************
    436  * This function is used in the case where one data and the parity have filed.
    437  * (in EO_110_CreateWriteDAG )
    438  **************************************************************************************/
    439 int
    440 rf_EO_DegradedWriteEFunc(RF_DagNode_t * node)
    441 {
    442 	rf_DegrESubroutine(node, node->results[0]);
    443 	rf_GenericWakeupFunc(node, 0);
    444 #if 1
    445 	return (0);		/* XXX Yet another one!! GO */
    446 #endif
    447 }
    448 
    449 
    450 
    451 /**************************************************************************************
    452  *  		THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
    453  **************************************************************************************/
    454 
    455 void
    456 rf_doubleEOdecode(
    457     RF_Raid_t * raidPtr,
    458     char **rrdbuf,
    459     char **dest,
    460     RF_RowCol_t * fcol,
    461     char *pbuf,
    462     char *ebuf)
    463 {
    464 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    465 	int     i, j, k, f1, f2, row;
    466 	int     rrdrow, erow, count = 0;
    467 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    468 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
    469 #if 0
    470 	int     pcol = (RF_EO_MATRIX_DIM) - 1;
    471 #endif
    472 	int     ecol = (RF_EO_MATRIX_DIM) - 2;
    473 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
    474 	int     numDataCol = layoutPtr->numDataCol;
    475 #if RF_EO_MATRIX_DIM > 17
    476 	int     shortsPerEU = bytesPerEU / sizeof(short);
    477 	short  *rrdbuf_current, *pbuf_current, *ebuf_current;
    478 	short  *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    479 	short *temp;
    480 	short  *P;
    481 
    482 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
    483 	RF_Malloc(P, bytesPerEU, (short *));
    484 	RF_Malloc(temp, bytesPerEU, (short *));
    485 #elif RF_EO_MATRIX_DIM == 17
    486 	int     longsPerEU = bytesPerEU / sizeof(long);
    487 	long   *rrdbuf_current, *pbuf_current, *ebuf_current;
    488 	long   *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    489 	long *temp;
    490 	long   *P;
    491 
    492 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
    493 	RF_Malloc(P, bytesPerEU, (long *));
    494 	RF_Malloc(temp, bytesPerEU, (long *));
    495 #endif
    496 	RF_ASSERT(*((long *) dest[0]) == 0);
    497 	RF_ASSERT(*((long *) dest[1]) == 0);
    498 	bzero((char *) P, bytesPerEU);
    499 	bzero((char *) temp, bytesPerEU);
    500 	RF_ASSERT(*P == 0);
    501 	/* calculate the 'P' parameter, which, not parity, is the Xor of all
    502 	 * elements in the last two column, ie. 'E' and 'parity' colume, see
    503 	 * the Ref. paper by Blaum, et al 1993  */
    504 	for (i = 0; i < numRowInEncMatix; i++)
    505 		for (k = 0; k < longsPerEU; k++) {
    506 #if RF_EO_MATRIX_DIM > 17
    507 			ebuf_current = ((short *) ebuf) + i * shortsPerEU + k;
    508 			pbuf_current = ((short *) pbuf) + i * shortsPerEU + k;
    509 #elif RF_EO_MATRIX_DIM == 17
    510 			ebuf_current = ((long *) ebuf) + i * longsPerEU + k;
    511 			pbuf_current = ((long *) pbuf) + i * longsPerEU + k;
    512 #endif
    513 			P[k] ^= *ebuf_current;
    514 			P[k] ^= *pbuf_current;
    515 		}
    516 	RF_ASSERT(fcol[0] != fcol[1]);
    517 	if (fcol[0] < fcol[1]) {
    518 #if RF_EO_MATRIX_DIM > 17
    519 		dest_smaller = (short *) (dest[0]);
    520 		dest_larger = (short *) (dest[1]);
    521 #elif RF_EO_MATRIX_DIM == 17
    522 		dest_smaller = (long *) (dest[0]);
    523 		dest_larger = (long *) (dest[1]);
    524 #endif
    525 		f1 = fcol[0];
    526 		f2 = fcol[1];
    527 	} else {
    528 #if RF_EO_MATRIX_DIM > 17
    529 		dest_smaller = (short *) (dest[1]);
    530 		dest_larger = (short *) (dest[0]);
    531 #elif RF_EO_MATRIX_DIM == 17
    532 		dest_smaller = (long *) (dest[1]);
    533 		dest_larger = (long *) (dest[0]);
    534 #endif
    535 		f1 = fcol[1];
    536 		f2 = fcol[0];
    537 	}
    538 	row = (RF_EO_MATRIX_DIM) - 1;
    539 	while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) {
    540 #if RF_EO_MATRIX_DIM > 17
    541 		dest_larger_current = dest_larger + row * shortsPerEU;
    542 		dest_smaller_current = dest_smaller + row * shortsPerEU;
    543 #elif RF_EO_MATRIX_DIM == 17
    544 		dest_larger_current = dest_larger + row * longsPerEU;
    545 		dest_smaller_current = dest_smaller + row * longsPerEU;
    546 #endif
    547 		/**    Do the diagonal recovery. Initially, temp[k] = (failed 1),
    548 		       which is the failed data in the colume which has smaller col index. **/
    549 		/* step 1:  ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3))         */
    550 		for (j = 0; j < numDataCol; j++) {
    551 			if (j == f1 || j == f2)
    552 				continue;
    553 			rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM);
    554 			if (rrdrow != (RF_EO_MATRIX_DIM) - 1) {
    555 #if RF_EO_MATRIX_DIM > 17
    556 				rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU;
    557 				for (k = 0; k < shortsPerEU; k++)
    558 					temp[k] ^= *(rrdbuf_current + k);
    559 #elif RF_EO_MATRIX_DIM == 17
    560 				rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU;
    561 				for (k = 0; k < longsPerEU; k++)
    562 					temp[k] ^= *(rrdbuf_current + k);
    563 #endif
    564 			}
    565 		}
    566 		/* step 2:  ^E(erow,m-2), If erow is at the buttom row, don't
    567 		 * Xor into it  E(erow,m-2) = (principle diagonal) ^ (failed
    568 		 * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal
    569 		 * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle
    570 		 * diagonal) ^ (failed 2)       */
    571 
    572 		erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM));
    573 		if (erow != (RF_EO_MATRIX_DIM) - 1) {
    574 #if RF_EO_MATRIX_DIM > 17
    575 			ebuf_current = (short *) ebuf + shortsPerEU * erow;
    576 			for (k = 0; k < shortsPerEU; k++)
    577 				temp[k] ^= *(ebuf_current + k);
    578 #elif RF_EO_MATRIX_DIM == 17
    579 			ebuf_current = (long *) ebuf + longsPerEU * erow;
    580 			for (k = 0; k < longsPerEU; k++)
    581 				temp[k] ^= *(ebuf_current + k);
    582 #endif
    583 		}
    584 		/* step 3: ^P to obtain the failed data (failed 2).  P can be
    585 		 * proved to be actually  (principle diagonal)  After this
    586 		 * step, temp[k] = (failed 2), the failed data to be recovered */
    587 #if RF_EO_MATRIX_DIM > 17
    588 		for (k = 0; k < shortsPerEU; k++)
    589 			temp[k] ^= P[k];
    590 		/* Put the data to the destination buffer                              */
    591 		for (k = 0; k < shortsPerEU; k++)
    592 			dest_larger_current[k] = temp[k];
    593 #elif RF_EO_MATRIX_DIM == 17
    594 		for (k = 0; k < longsPerEU; k++)
    595 			temp[k] ^= P[k];
    596 		/* Put the data to the destination buffer                              */
    597 		for (k = 0; k < longsPerEU; k++)
    598 			dest_larger_current[k] = temp[k];
    599 #endif
    600 
    601 		/**          THE FOLLOWING DO THE HORIZONTAL XOR                **/
    602 		/* step 1:  ^(SUM of A(row,0..m-3)), ie. all nonfailed data
    603 		 * columes    */
    604 		for (j = 0; j < numDataCol; j++) {
    605 			if (j == f1 || j == f2)
    606 				continue;
    607 #if RF_EO_MATRIX_DIM > 17
    608 			rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU;
    609 			for (k = 0; k < shortsPerEU; k++)
    610 				temp[k] ^= *(rrdbuf_current + k);
    611 #elif RF_EO_MATRIX_DIM == 17
    612 			rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU;
    613 			for (k = 0; k < longsPerEU; k++)
    614 				temp[k] ^= *(rrdbuf_current + k);
    615 #endif
    616 		}
    617 		/* step 2: ^A(row,m-1) */
    618 		/* step 3: Put the data to the destination buffer                             	 */
    619 #if RF_EO_MATRIX_DIM > 17
    620 		pbuf_current = (short *) pbuf + shortsPerEU * row;
    621 		for (k = 0; k < shortsPerEU; k++)
    622 			temp[k] ^= *(pbuf_current + k);
    623 		for (k = 0; k < shortsPerEU; k++)
    624 			dest_smaller_current[k] = temp[k];
    625 #elif RF_EO_MATRIX_DIM == 17
    626 		pbuf_current = (long *) pbuf + longsPerEU * row;
    627 		for (k = 0; k < longsPerEU; k++)
    628 			temp[k] ^= *(pbuf_current + k);
    629 		for (k = 0; k < longsPerEU; k++)
    630 			dest_smaller_current[k] = temp[k];
    631 #endif
    632 		count++;
    633 	}
    634 	/* Check if all Encoding Unit in the data buffer have been decoded,
    635 	 * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
    636 	 * this algorithm will covered all buffer 				 */
    637 	RF_ASSERT(count == numRowInEncMatix);
    638 	RF_Free((char *) P, bytesPerEU);
    639 	RF_Free((char *) temp, bytesPerEU);
    640 }
    641 
    642 
    643 /***************************************************************************************
    644 * 	This function is called by double degragded read
    645 * 	EO_200_CreateReadDAG
    646 *
    647 ***************************************************************************************/
    648 int
    649 rf_EvenOddDoubleRecoveryFunc(node)
    650 	RF_DagNode_t *node;
    651 {
    652 	int     ndataParam = 0;
    653 	int     np = node->numParams;
    654 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    655 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    656 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    657 	int     i, prm, sector, nresults = node->numResults;
    658 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    659 	unsigned sosAddr;
    660 	int     two = 0, mallc_one = 0, mallc_two = 0;	/* flags to indicate if
    661 							 * memory is allocated */
    662 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    663 	RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1,
    664 	        npda;
    665 	RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
    666 	char  **buf, *ebuf, *pbuf, *dest[2];
    667 	long   *suoff = NULL, *suend = NULL, *prmToCol = NULL, psuoff, esuoff;
    668 	RF_SectorNum_t startSector, endSector;
    669 	RF_Etimer_t timer;
    670 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    671 
    672 	RF_ETIMER_START(timer);
    673 
    674 	/* Find out the number of parameters which are pdas for data
    675 	 * information */
    676 	for (i = 0; i <= np; i++)
    677 		if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) {
    678 			ndataParam = i;
    679 			break;
    680 		}
    681 	RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
    682 	if (ndataParam != 0) {
    683 		RF_Malloc(suoff, ndataParam * sizeof(long), (long *));
    684 		RF_Malloc(suend, ndataParam * sizeof(long), (long *));
    685 		RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *));
    686 	}
    687 	if (asmap->failedPDAs[1] &&
    688 	    (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
    689 		RF_ASSERT(0);	/* currently, no support for this situation */
    690 		ppda = node->params[np - 6].p;
    691 		ppda2 = node->params[np - 5].p;
    692 		RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY);
    693 		epda = node->params[np - 4].p;
    694 		epda2 = node->params[np - 3].p;
    695 		RF_ASSERT(epda2->type == RF_PDA_TYPE_Q);
    696 		two = 1;
    697 	} else {
    698 		ppda = node->params[np - 4].p;
    699 		epda = node->params[np - 3].p;
    700 		psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
    701 		esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
    702 		RF_ASSERT(psuoff == esuoff);
    703 	}
    704 	/*
    705             the followings have three goals:
    706             1. determine the startSector to begin decoding and endSector to end decoding.
    707             2. determine the colume numbers of the two failed disks.
    708             3. determine the offset and end offset of the access within each failed stripe unit.
    709          */
    710 	if (nresults == 1) {
    711 		/* find the startSector to begin decoding */
    712 		pda = node->results[0];
    713 		bzero(pda->bufPtr, bytesPerSector * pda->numSector);
    714 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    715 		fsuend[0] = fsuoff[0] + pda->numSector;
    716 		startSector = fsuoff[0];
    717 		endSector = fsuend[0];
    718 
    719 		/* find out the column of failed disk being accessed */
    720 		fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress);
    721 
    722 		/* find out the other failed colume not accessed */
    723 		sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    724 		for (i = 0; i < numDataCol; i++) {
    725 			npda.raidAddress = sosAddr + (i * secPerSU);
    726 			(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
    727 			/* skip over dead disks */
    728 			if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
    729 				if (i != fcol[0])
    730 					break;
    731 		}
    732 		RF_ASSERT(i < numDataCol);
    733 		fcol[1] = i;
    734 	} else {
    735 		RF_ASSERT(nresults == 2);
    736 		pda0 = node->results[0];
    737 		bzero(pda0->bufPtr, bytesPerSector * pda0->numSector);
    738 		pda1 = node->results[1];
    739 		bzero(pda1->bufPtr, bytesPerSector * pda1->numSector);
    740 		/* determine the failed colume numbers of the two failed
    741 		 * disks. */
    742 		fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress);
    743 		fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress);
    744 		/* determine the offset and end offset of the access within
    745 		 * each failed stripe unit. */
    746 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector);
    747 		fsuend[0] = fsuoff[0] + pda0->numSector;
    748 		fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector);
    749 		fsuend[1] = fsuoff[1] + pda1->numSector;
    750 		/* determine the startSector to begin decoding */
    751 		startSector = RF_MIN(pda0->startSector, pda1->startSector);
    752 		/* determine the endSector to end decoding */
    753 		endSector = RF_MAX(fsuend[0], fsuend[1]);
    754 	}
    755 	/*
    756 	      assign the beginning sector and the end sector for each parameter
    757 	      find out the corresponding colume # for each parameter
    758         */
    759 	for (prm = 0; prm < ndataParam; prm++) {
    760 		pda = node->params[prm].p;
    761 		suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    762 		suend[prm] = suoff[prm] + pda->numSector;
    763 		prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress);
    764 	}
    765 	/* 'sector' is the sector for the current decoding algorithm. For each
    766 	 * sector in the failed SU, find out the corresponding parameters that
    767 	 * cover the current sector and that are needed for decoding of this
    768 	 * sector in failed SU. 2.  Find out if sector is in the shadow of any
    769 	 * accessed failed SU. If not, malloc a temporary space of a sector in
    770 	 * size. */
    771 	for (sector = startSector; sector < endSector; sector++) {
    772 		if (nresults == 2)
    773 			if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1]))
    774 				continue;
    775 		for (prm = 0; prm < ndataParam; prm++)
    776 			if (suoff[prm] <= sector && sector < suend[prm])
    777 				buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr +
    778 				    rf_RaidAddressToByte(raidPtr, sector - suoff[prm]);
    779 		/* find out if sector is in the shadow of any accessed failed
    780 		 * SU. If yes, assign dest[0], dest[1] to point at suitable
    781 		 * position of the buffer corresponding to failed SUs. if no,
    782 		 * malloc a temporary space of a sector in size for
    783 		 * destination of decoding. */
    784 		RF_ASSERT(nresults == 1 || nresults == 2);
    785 		if (nresults == 1) {
    786 			dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
    787 			/* Always malloc temp buffer to dest[1]  */
    788 			RF_Malloc(dest[1], bytesPerSector, (char *));
    789 			bzero(dest[1], bytesPerSector);
    790 			mallc_two = 1;
    791 		} else {
    792 			if (fsuoff[0] <= sector && sector < fsuend[0])
    793 				dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
    794 			else {
    795 				RF_Malloc(dest[0], bytesPerSector, (char *));
    796 				bzero(dest[0], bytesPerSector);
    797 				mallc_one = 1;
    798 			}
    799 			if (fsuoff[1] <= sector && sector < fsuend[1])
    800 				dest[1] = ((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]);
    801 			else {
    802 				RF_Malloc(dest[1], bytesPerSector, (char *));
    803 				bzero(dest[1], bytesPerSector);
    804 				mallc_two = 1;
    805 			}
    806 			RF_ASSERT(mallc_one == 0 || mallc_two == 0);
    807 		}
    808 		pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff);
    809 		ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff);
    810 		/*
    811 	         * After finish finding all needed sectors, call doubleEOdecode function for decoding
    812 	         * one sector to destination.
    813 	         */
    814 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
    815 		/* free all allocated memory, and mark flag to indicate no
    816 		 * memory is being allocated */
    817 		if (mallc_one == 1)
    818 			RF_Free(dest[0], bytesPerSector);
    819 		if (mallc_two == 1)
    820 			RF_Free(dest[1], bytesPerSector);
    821 		mallc_one = mallc_two = 0;
    822 	}
    823 	RF_Free(buf, numDataCol * sizeof(char *));
    824 	if (ndataParam != 0) {
    825 		RF_Free(suoff, ndataParam * sizeof(long));
    826 		RF_Free(suend, ndataParam * sizeof(long));
    827 		RF_Free(prmToCol, ndataParam * sizeof(long));
    828 	}
    829 	RF_ETIMER_STOP(timer);
    830 	RF_ETIMER_EVAL(timer);
    831 	if (tracerec) {
    832 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    833 	}
    834 	rf_GenericWakeupFunc(node, 0);
    835 #if 1
    836 	return (0);		/* XXX is this even close!!?!?!!? GO */
    837 #endif
    838 }
    839 
    840 
    841 /* currently, only access of one of the two failed SU is allowed in this function.
    842  * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
    843  * many accesses of single stripe unit.
    844  */
    845 
    846 int
    847 rf_EOWriteDoubleRecoveryFunc(node)
    848 	RF_DagNode_t *node;
    849 {
    850 	int     np = node->numParams;
    851 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    852 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    853 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    854 	RF_SectorNum_t sector;
    855 	RF_RowCol_t col, scol;
    856 	int     prm, i, j;
    857 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    858 	unsigned sosAddr;
    859 	unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    860 	RF_int64 numbytes;
    861 	RF_SectorNum_t startSector, endSector;
    862 	RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda;
    863 	RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
    864 	char  **buf;		/* buf[0], buf[1], buf[2], ...etc. point to
    865 				 * buffer storing data read from col0, col1,
    866 				 * col2 */
    867 	char   *ebuf, *pbuf, *dest[2], *olddata[2];
    868 	RF_Etimer_t timer;
    869 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    870 
    871 	RF_ASSERT(asmap->numDataFailed == 1);	/* currently only support this
    872 						 * case, the other failed SU
    873 						 * is not being accessed */
    874 	RF_ETIMER_START(timer);
    875 	RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
    876 
    877 	ppda = node->results[0];/* Instead of being buffers, node->results[0]
    878 				 * and [1] are Ppda and Epda  */
    879 	epda = node->results[1];
    880 	fpda = asmap->failedPDAs[0];
    881 
    882 	/* First, recovery the failed old SU using EvenOdd double decoding      */
    883 	/* determine the startSector and endSector for decoding */
    884 	startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector);
    885 	endSector = startSector + fpda->numSector;
    886 	/* Assign buf[col] pointers to point to each non-failed colume  and
    887 	 * initialize the pbuf and ebuf to point at the beginning of each
    888 	 * source buffers and destination buffers */
    889 	for (prm = 0; prm < numDataCol - 2; prm++) {
    890 		pda = (RF_PhysDiskAddr_t *) node->params[prm].p;
    891 		col = rf_EUCol(layoutPtr, pda->raidAddress);
    892 		buf[col] = pda->bufPtr;
    893 	}
    894 	/* pbuf and ebuf:  they will change values as double recovery decoding
    895 	 * goes on */
    896 	pbuf = ppda->bufPtr;
    897 	ebuf = epda->bufPtr;
    898 	/* find out the logical colume numbers in the encoding matrix of the
    899 	 * two failed columes */
    900 	fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress);
    901 
    902 	/* find out the other failed colume not accessed this time */
    903 	sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    904 	for (i = 0; i < numDataCol; i++) {
    905 		npda.raidAddress = sosAddr + (i * secPerSU);
    906 		(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
    907 		/* skip over dead disks */
    908 		if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
    909 			if (i != fcol[0])
    910 				break;
    911 	}
    912 	RF_ASSERT(i < numDataCol);
    913 	fcol[1] = i;
    914 	/* assign temporary space to put recovered failed SU */
    915 	numbytes = fpda->numSector * bytesPerSector;
    916 	RF_Malloc(olddata[0], numbytes, (char *));
    917 	RF_Malloc(olddata[1], numbytes, (char *));
    918 	dest[0] = olddata[0];
    919 	dest[1] = olddata[1];
    920 	bzero(olddata[0], numbytes);
    921 	bzero(olddata[1], numbytes);
    922 	/* Begin the recovery decoding, initially buf[j],  ebuf, pbuf, dest[j]
    923 	 * have already pointed at the beginning of each source buffers and
    924 	 * destination buffers */
    925 	for (sector = startSector, i = 0; sector < endSector; sector++, i++) {
    926 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
    927 		for (j = 0; j < numDataCol; j++)
    928 			if ((j != fcol[0]) && (j != fcol[1]))
    929 				buf[j] += bytesPerSector;
    930 		dest[0] += bytesPerSector;
    931 		dest[1] += bytesPerSector;
    932 		ebuf += bytesPerSector;
    933 		pbuf += bytesPerSector;
    934 	}
    935 	/* after recovery, the buffer pointed by olddata[0] is the old failed
    936 	 * data. With new writing data and this old data, use small write to
    937 	 * calculate the new redundant informations */
    938 	/* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of
    939 	 * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol
    940 	 * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[
    941 	 * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol
    942 	 * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of
    943 	 * wudNodes; For current implementation, we assume the simplest case:
    944 	 * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1
    945 	 * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new
    946 	 * data to be writen to the failed disk. We first bxor the new data
    947 	 * into the old recovered data, then do the same things as small
    948 	 * write. */
    949 
    950 	rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes, node->dagHdr->bp);
    951 	/* do new 'E' calculation  */
    952 	/* find out the corresponding colume in encoding matrix for write
    953 	 * colume to be encoded into redundant disk 'E' */
    954 	scol = rf_EUCol(layoutPtr, fpda->raidAddress);
    955 	/* olddata[0] now is source buffer pointer; epda->bufPtr is the dest
    956 	 * buffer pointer               */
    957 	rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
    958 
    959 	/* do new 'P' calculation  */
    960 	rf_bxor(olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp);
    961 	/* Free the allocated buffer  */
    962 	RF_Free(olddata[0], numbytes);
    963 	RF_Free(olddata[1], numbytes);
    964 	RF_Free(buf, numDataCol * sizeof(char *));
    965 
    966 	RF_ETIMER_STOP(timer);
    967 	RF_ETIMER_EVAL(timer);
    968 	if (tracerec) {
    969 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    970 	}
    971 	rf_GenericWakeupFunc(node, 0);
    972 	return (0);
    973 }
    974 #endif				/* RF_INCLUDE_EVENODD > 0 */
    975