Home | History | Annotate | Line # | Download | only in raidframe
rf_evenodd_dagfuncs.c revision 1.19
      1 /*	$NetBSD: rf_evenodd_dagfuncs.c,v 1.19 2008/11/18 14:29:55 ad Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: ChangMing Wu
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  * Code for RAID-EVENODD  architecture.
     31  */
     32 
     33 #include <sys/cdefs.h>
     34 __KERNEL_RCSID(0, "$NetBSD: rf_evenodd_dagfuncs.c,v 1.19 2008/11/18 14:29:55 ad Exp $");
     35 
     36 #include "rf_archs.h"
     37 
     38 #ifdef _KERNEL_OPT
     39 #include "opt_raid_diagnostic.h"
     40 #endif
     41 
     42 #if RF_INCLUDE_EVENODD > 0
     43 
     44 #include <dev/raidframe/raidframevar.h>
     45 
     46 #include "rf_raid.h"
     47 #include "rf_dag.h"
     48 #include "rf_dagffrd.h"
     49 #include "rf_dagffwr.h"
     50 #include "rf_dagdegrd.h"
     51 #include "rf_dagdegwr.h"
     52 #include "rf_dagutils.h"
     53 #include "rf_dagfuncs.h"
     54 #include "rf_etimer.h"
     55 #include "rf_general.h"
     56 #include "rf_parityscan.h"
     57 #include "rf_evenodd.h"
     58 #include "rf_evenodd_dagfuncs.h"
     59 
     60 /* These redundant functions are for small write */
     61 RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P"};
     62 RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E"};
     63 /* These redundant functions are for degraded read */
     64 RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
     65 RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func"};
     66 /**********************************************************************************************
     67  *   the following encoding node functions is used in  EO_000_CreateLargeWriteDAG
     68  **********************************************************************************************/
     69 int
     70 rf_RegularPEFunc(node)
     71 	RF_DagNode_t *node;
     72 {
     73 	rf_RegularESubroutine(node, node->results[1]);
     74 	rf_RegularXorFunc(node);/* does the wakeup here! */
     75 #if 1
     76 	return (0);		/* XXX This was missing... GO */
     77 #endif
     78 }
     79 
     80 
     81 /************************************************************************************************
     82  *  For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
     83  *  be used. The previous case is when write access at least sectors of full stripe unit.
     84  *  The later function is used when the write access two stripe units but with total sectors
     85  *  less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
     86  *  areas in their stripe unit and  parity write and 'E' write are both devided into two distinct
     87  *  writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
     88  ************************************************************************************************/
     89 
     90 /* Algorithm:
     91      1. Store the difference of old data and new data in the Rod buffer.
     92      2. then encode this buffer into the buffer which already have old 'E' information inside it,
     93 	the result can be shown to be the new 'E' information.
     94      3. xor the Wnd buffer into the difference buffer to recover the  original old data.
     95    Here we have another alternative: to allocate a temporary buffer for storing the difference of
     96    old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
     97    take the same speed as the previous, and need more memory.
     98 */
     99 int
    100 rf_RegularONEFunc(node)
    101 	RF_DagNode_t *node;
    102 {
    103 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    104 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    105 	int     EpdaIndex = (node->numParams - 1) / 2 - 1;	/* the parameter of node
    106 								 * where you can find
    107 								 * e-pda */
    108 	int     i, k, retcode = 0;
    109 	int     suoffset, length;
    110 	RF_RowCol_t scol;
    111 	char   *srcbuf, *destbuf;
    112 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    113 	RF_Etimer_t timer;
    114 	RF_PhysDiskAddr_t *pda;
    115 #ifdef RAID_DIAGNOSTIC
    116 	RF_PhysDiskAddr_t *EPDA =
    117 	    (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
    118 	int     ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);
    119 #endif /* RAID_DIAGNOSTIC */
    120 
    121 	RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
    122 	RF_ASSERT(ESUOffset == 0);
    123 
    124 	RF_ETIMER_START(timer);
    125 
    126 	/* Xor the Wnd buffer into Rod buffer, the difference of old data and
    127 	 * new data is stored in Rod buffer */
    128 	for (k = 0; k < EpdaIndex; k += 2) {
    129 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
    130 		retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length);
    131 	}
    132 	/* Start to encoding the buffer storing the difference of old data and
    133 	 * new data into 'E' buffer  */
    134 	for (i = 0; i < EpdaIndex; i += 2)
    135 		if (node->params[i + 1].p != node->results[0]) {	/* results[0] is buf ptr
    136 									 * of E */
    137 			pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    138 			srcbuf = (char *) node->params[i + 1].p;
    139 			scol = rf_EUCol(layoutPtr, pda->raidAddress);
    140 			suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    141 			destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset);
    142 			rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    143 		}
    144 	/* Recover the original old data to be used by parity encoding
    145 	 * function in XorNode */
    146 	for (k = 0; k < EpdaIndex; k += 2) {
    147 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
    148 		retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length);
    149 	}
    150 	RF_ETIMER_STOP(timer);
    151 	RF_ETIMER_EVAL(timer);
    152 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    153 	rf_GenericWakeupFunc(node, 0);
    154 #if 1
    155 	return (0);		/* XXX this was missing.. GO */
    156 #endif
    157 }
    158 
    159 int
    160 rf_SimpleONEFunc(node)
    161 	RF_DagNode_t *node;
    162 {
    163 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    164 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    165 	RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
    166 	int     retcode = 0;
    167 	char   *srcbuf, *destbuf;
    168 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    169 	int     length;
    170 	RF_RowCol_t scol;
    171 	RF_Etimer_t timer;
    172 
    173 	RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q);
    174 	if (node->dagHdr->status == rf_enable) {
    175 		RF_ETIMER_START(timer);
    176 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector);	/* this is a pda of
    177 														 * writeDataNodes */
    178 		/* bxor to buffer of readDataNodes */
    179 		retcode = rf_bxor(node->params[5].p, node->params[1].p, length);
    180 		/* find out the corresponding colume in encoding matrix for
    181 		 * write colume to be encoded into redundant disk 'E' */
    182 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    183 		srcbuf = node->params[1].p;
    184 		destbuf = node->params[3].p;
    185 		/* Start encoding process */
    186 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    187 		rf_bxor(node->params[5].p, node->params[1].p, length);
    188 		RF_ETIMER_STOP(timer);
    189 		RF_ETIMER_EVAL(timer);
    190 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    191 
    192 	}
    193 	return (rf_GenericWakeupFunc(node, retcode));	/* call wake func
    194 							 * explicitly since no
    195 							 * I/O in this node */
    196 }
    197 
    198 
    199 /****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write  ********/
    200 void
    201 rf_RegularESubroutine(node, ebuf)
    202 	RF_DagNode_t *node;
    203 	char   *ebuf;
    204 {
    205 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    206 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    207 	RF_PhysDiskAddr_t *pda;
    208 	int     i, suoffset;
    209 	RF_RowCol_t scol;
    210 	char   *srcbuf, *destbuf;
    211 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    212 	RF_Etimer_t timer;
    213 
    214 	RF_ETIMER_START(timer);
    215 	for (i = 0; i < node->numParams - 2; i += 2) {
    216 		RF_ASSERT(node->params[i + 1].p != ebuf);
    217 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    218 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    219 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    220 		srcbuf = (char *) node->params[i + 1].p;
    221 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset);
    222 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    223 	}
    224 	RF_ETIMER_STOP(timer);
    225 	RF_ETIMER_EVAL(timer);
    226 	tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    227 }
    228 
    229 
    230 /*******************************************************************************************
    231  *			 Used in  EO_001_CreateLargeWriteDAG
    232  ******************************************************************************************/
    233 int
    234 rf_RegularEFunc(node)
    235 	RF_DagNode_t *node;
    236 {
    237 	rf_RegularESubroutine(node, node->results[0]);
    238 	rf_GenericWakeupFunc(node, 0);
    239 #if 1
    240 	return (0);		/* XXX this was missing?.. GO */
    241 #endif
    242 }
    243 /*******************************************************************************************
    244  * This degraded function allow only two case:
    245  *  1. when write access the full failed stripe unit, then the access can be more than
    246  *     one tripe units.
    247  *  2. when write access only part of the failed SU, we assume accesses of more than
    248  *     one stripe unit is not allowed so that the write can be dealt with like a
    249  *     large write.
    250  *  The following function is based on these assumptions. So except in the second case,
    251  *  it looks the same as a large write encodeing function. But this is not exactly the
    252  *  normal way for doing a degraded write, since raidframe have to break cases of access
    253  *  other than the above two into smaller accesses. We may have to change
    254  *  DegrESubroutin in the future.
    255  *******************************************************************************************/
    256 void
    257 rf_DegrESubroutine(node, ebuf)
    258 	RF_DagNode_t *node;
    259 	char   *ebuf;
    260 {
    261 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    262 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    263 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    264 	RF_PhysDiskAddr_t *pda;
    265 	int     i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    266 	RF_RowCol_t scol;
    267 	char   *srcbuf, *destbuf;
    268 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    269 	RF_Etimer_t timer;
    270 
    271 	RF_ETIMER_START(timer);
    272 	for (i = 0; i < node->numParams - 2; i += 2) {
    273 		RF_ASSERT(node->params[i + 1].p != ebuf);
    274 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    275 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    276 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    277 		srcbuf = (char *) node->params[i + 1].p;
    278 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    279 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    280 	}
    281 
    282 	RF_ETIMER_STOP(timer);
    283 	RF_ETIMER_EVAL(timer);
    284 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    285 }
    286 
    287 
    288 /**************************************************************************************
    289  * This function is used in case where one data disk failed and both redundant disks
    290  * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
    291  * failed in the stripe but not accessed at this time, then we should, instead, use
    292  * the rf_EOWriteDoubleRecoveryFunc().
    293  **************************************************************************************/
    294 int
    295 rf_Degraded_100_EOFunc(node)
    296 	RF_DagNode_t *node;
    297 {
    298 	rf_DegrESubroutine(node, node->results[1]);
    299 	rf_RecoveryXorFunc(node);	/* does the wakeup here! */
    300 #if 1
    301 	return (0);		/* XXX this was missing... SHould these be
    302 				 * void functions??? GO */
    303 #endif
    304 }
    305 /**************************************************************************************
    306  * This function is to encode one sector in one of the data disks to the E disk.
    307  * However, in evenodd this function can also be used as decoding function to recover
    308  * data from dead disk in the case of parity failure and a single data failure.
    309  **************************************************************************************/
    310 void
    311 rf_e_EncOneSect(
    312     RF_RowCol_t srcLogicCol,
    313     char *srcSecbuf,
    314     RF_RowCol_t destLogicCol,
    315     char *destSecbuf,
    316     int bytesPerSector)
    317 {
    318 	int     S_index;	/* index of the EU in the src col which need
    319 				 * be Xored into all EUs in a dest sector */
    320 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
    321 	RF_RowCol_t j, indexInDest,	/* row index of an encoding unit in
    322 					 * the destination colume of encoding
    323 					 * matrix */
    324 	        indexInSrc;	/* row index of an encoding unit in the source
    325 				 * colume used for recovery */
    326 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
    327 
    328 #if RF_EO_MATRIX_DIM > 17
    329 	int     shortsPerEU = bytesPerEU / sizeof(short);
    330 	short  *destShortBuf, *srcShortBuf1, *srcShortBuf2;
    331 	short temp1;
    332 #elif RF_EO_MATRIX_DIM == 17
    333 	int     longsPerEU = bytesPerEU / sizeof(long);
    334 	long   *destLongBuf, *srcLongBuf1, *srcLongBuf2;
    335 	long temp1;
    336 #endif
    337 
    338 #if RF_EO_MATRIX_DIM > 17
    339 	RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1);
    340 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
    341 #elif RF_EO_MATRIX_DIM == 17
    342 	RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4);
    343 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
    344 #endif
    345 
    346 	S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    347 #if RF_EO_MATRIX_DIM > 17
    348 	srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU);
    349 #elif RF_EO_MATRIX_DIM == 17
    350 	srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU);
    351 #endif
    352 
    353 	for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) {
    354 		indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    355 
    356 #if RF_EO_MATRIX_DIM > 17
    357 		destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU);
    358 		srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU);
    359 		for (j = 0; j < shortsPerEU; j++) {
    360 			temp1 = destShortBuf[j] ^ srcShortBuf1[j];
    361 			/* note: S_index won't be at the end row for any src
    362 			 * col! */
    363 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
    364 				destShortBuf[j] = (srcShortBuf2[j]) ^ temp1;
    365 			/* if indexInSrc is at the end row, ie.
    366 			 * RF_EO_MATRIX_DIM -1, then all elements are zero! */
    367 			else
    368 				destShortBuf[j] = temp1;
    369 		}
    370 
    371 #elif RF_EO_MATRIX_DIM == 17
    372 		destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU);
    373 		srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU);
    374 		for (j = 0; j < longsPerEU; j++) {
    375 			temp1 = destLongBuf[j] ^ srcLongBuf1[j];
    376 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
    377 				destLongBuf[j] = (srcLongBuf2[j]) ^ temp1;
    378 			else
    379 				destLongBuf[j] = temp1;
    380 		}
    381 #endif
    382 	}
    383 }
    384 
    385 void
    386 rf_e_encToBuf(
    387     RF_Raid_t * raidPtr,
    388     RF_RowCol_t srcLogicCol,
    389     char *srcbuf,
    390     RF_RowCol_t destLogicCol,
    391     char *destbuf,
    392     int numSector)
    393 {
    394 	int     i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    395 
    396 	for (i = 0; i < numSector; i++) {
    397 		rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
    398 		srcbuf += bytesPerSector;
    399 		destbuf += bytesPerSector;
    400 	}
    401 }
    402 /**************************************************************************************
    403  * when parity die and one data die, We use second redundant information, 'E',
    404  * to recover the data in dead disk. This function is used in the recovery node of
    405  * for EO_110_CreateReadDAG
    406  **************************************************************************************/
    407 int
    408 rf_RecoveryEFunc(node)
    409 	RF_DagNode_t *node;
    410 {
    411 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    412 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    413 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    414 	RF_RowCol_t scol,	/* source logical column */
    415 	        fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress);	/* logical column of
    416 									 * failed SU */
    417 	int     i;
    418 	RF_PhysDiskAddr_t *pda;
    419 	int     suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    420 	char   *srcbuf, *destbuf;
    421 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    422 	RF_Etimer_t timer;
    423 
    424 	memset((char *) node->results[0], 0,
    425 	    rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
    426 	if (node->dagHdr->status == rf_enable) {
    427 		RF_ETIMER_START(timer);
    428 		for (i = 0; i < node->numParams - 2; i += 2)
    429 			if (node->params[i + 1].p != node->results[0]) {
    430 				pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    431 				if (i == node->numParams - 4)
    432 					scol = RF_EO_MATRIX_DIM - 2;	/* the colume of
    433 									 * redundant E */
    434 				else
    435 					scol = rf_EUCol(layoutPtr, pda->raidAddress);
    436 				srcbuf = (char *) node->params[i + 1].p;
    437 				suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    438 				destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    439 				rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
    440 			}
    441 		RF_ETIMER_STOP(timer);
    442 		RF_ETIMER_EVAL(timer);
    443 		tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    444 	}
    445 	return (rf_GenericWakeupFunc(node, 0));	/* node execute successfully */
    446 }
    447 /**************************************************************************************
    448  * This function is used in the case where one data and the parity have filed.
    449  * (in EO_110_CreateWriteDAG )
    450  **************************************************************************************/
    451 int
    452 rf_EO_DegradedWriteEFunc(RF_DagNode_t * node)
    453 {
    454 	rf_DegrESubroutine(node, node->results[0]);
    455 	rf_GenericWakeupFunc(node, 0);
    456 #if 1
    457 	return (0);		/* XXX Yet another one!! GO */
    458 #endif
    459 }
    460 
    461 
    462 
    463 /**************************************************************************************
    464  *  		THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
    465  **************************************************************************************/
    466 
    467 void
    468 rf_doubleEOdecode(
    469     RF_Raid_t * raidPtr,
    470     char **rrdbuf,
    471     char **dest,
    472     RF_RowCol_t * fcol,
    473     char *pbuf,
    474     char *ebuf)
    475 {
    476 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    477 	int     i, j, k, f1, f2, row;
    478 	int     rrdrow, erow, count = 0;
    479 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    480 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
    481 #if 0
    482 	int     pcol = (RF_EO_MATRIX_DIM) - 1;
    483 #endif
    484 	int     ecol = (RF_EO_MATRIX_DIM) - 2;
    485 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
    486 	int     numDataCol = layoutPtr->numDataCol;
    487 #if RF_EO_MATRIX_DIM > 17
    488 	int     shortsPerEU = bytesPerEU / sizeof(short);
    489 	short  *rrdbuf_current, *pbuf_current, *ebuf_current;
    490 	short  *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    491 	short *temp;
    492 	short  *P;
    493 
    494 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
    495 	RF_Malloc(P, bytesPerEU, (short *));
    496 	RF_Malloc(temp, bytesPerEU, (short *));
    497 #elif RF_EO_MATRIX_DIM == 17
    498 	int     longsPerEU = bytesPerEU / sizeof(long);
    499 	long   *rrdbuf_current, *pbuf_current, *ebuf_current;
    500 	long   *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    501 	long *temp;
    502 	long   *P;
    503 
    504 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
    505 	RF_Malloc(P, bytesPerEU, (long *));
    506 	RF_Malloc(temp, bytesPerEU, (long *));
    507 #endif
    508 	RF_ASSERT(*((long *) dest[0]) == 0);
    509 	RF_ASSERT(*((long *) dest[1]) == 0);
    510 	memset((char *) P, 0, bytesPerEU);
    511 	memset((char *) temp, 0, bytesPerEU);
    512 	RF_ASSERT(*P == 0);
    513 	/* calculate the 'P' parameter, which, not parity, is the Xor of all
    514 	 * elements in the last two column, ie. 'E' and 'parity' colume, see
    515 	 * the Ref. paper by Blaum, et al 1993  */
    516 	for (i = 0; i < numRowInEncMatix; i++)
    517 		for (k = 0; k < longsPerEU; k++) {
    518 #if RF_EO_MATRIX_DIM > 17
    519 			ebuf_current = ((short *) ebuf) + i * shortsPerEU + k;
    520 			pbuf_current = ((short *) pbuf) + i * shortsPerEU + k;
    521 #elif RF_EO_MATRIX_DIM == 17
    522 			ebuf_current = ((long *) ebuf) + i * longsPerEU + k;
    523 			pbuf_current = ((long *) pbuf) + i * longsPerEU + k;
    524 #endif
    525 			P[k] ^= *ebuf_current;
    526 			P[k] ^= *pbuf_current;
    527 		}
    528 	RF_ASSERT(fcol[0] != fcol[1]);
    529 	if (fcol[0] < fcol[1]) {
    530 #if RF_EO_MATRIX_DIM > 17
    531 		dest_smaller = (short *) (dest[0]);
    532 		dest_larger = (short *) (dest[1]);
    533 #elif RF_EO_MATRIX_DIM == 17
    534 		dest_smaller = (long *) (dest[0]);
    535 		dest_larger = (long *) (dest[1]);
    536 #endif
    537 		f1 = fcol[0];
    538 		f2 = fcol[1];
    539 	} else {
    540 #if RF_EO_MATRIX_DIM > 17
    541 		dest_smaller = (short *) (dest[1]);
    542 		dest_larger = (short *) (dest[0]);
    543 #elif RF_EO_MATRIX_DIM == 17
    544 		dest_smaller = (long *) (dest[1]);
    545 		dest_larger = (long *) (dest[0]);
    546 #endif
    547 		f1 = fcol[1];
    548 		f2 = fcol[0];
    549 	}
    550 	row = (RF_EO_MATRIX_DIM) - 1;
    551 	while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) {
    552 #if RF_EO_MATRIX_DIM > 17
    553 		dest_larger_current = dest_larger + row * shortsPerEU;
    554 		dest_smaller_current = dest_smaller + row * shortsPerEU;
    555 #elif RF_EO_MATRIX_DIM == 17
    556 		dest_larger_current = dest_larger + row * longsPerEU;
    557 		dest_smaller_current = dest_smaller + row * longsPerEU;
    558 #endif
    559 		/**    Do the diagonal recovery. Initially, temp[k] = (failed 1),
    560 		       which is the failed data in the colume which has smaller col index. **/
    561 		/* step 1:  ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3))         */
    562 		for (j = 0; j < numDataCol; j++) {
    563 			if (j == f1 || j == f2)
    564 				continue;
    565 			rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM);
    566 			if (rrdrow != (RF_EO_MATRIX_DIM) - 1) {
    567 #if RF_EO_MATRIX_DIM > 17
    568 				rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU;
    569 				for (k = 0; k < shortsPerEU; k++)
    570 					temp[k] ^= *(rrdbuf_current + k);
    571 #elif RF_EO_MATRIX_DIM == 17
    572 				rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU;
    573 				for (k = 0; k < longsPerEU; k++)
    574 					temp[k] ^= *(rrdbuf_current + k);
    575 #endif
    576 			}
    577 		}
    578 		/* step 2:  ^E(erow,m-2), If erow is at the buttom row, don't
    579 		 * Xor into it  E(erow,m-2) = (principle diagonal) ^ (failed
    580 		 * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal
    581 		 * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle
    582 		 * diagonal) ^ (failed 2)       */
    583 
    584 		erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM));
    585 		if (erow != (RF_EO_MATRIX_DIM) - 1) {
    586 #if RF_EO_MATRIX_DIM > 17
    587 			ebuf_current = (short *) ebuf + shortsPerEU * erow;
    588 			for (k = 0; k < shortsPerEU; k++)
    589 				temp[k] ^= *(ebuf_current + k);
    590 #elif RF_EO_MATRIX_DIM == 17
    591 			ebuf_current = (long *) ebuf + longsPerEU * erow;
    592 			for (k = 0; k < longsPerEU; k++)
    593 				temp[k] ^= *(ebuf_current + k);
    594 #endif
    595 		}
    596 		/* step 3: ^P to obtain the failed data (failed 2).  P can be
    597 		 * proved to be actually  (principle diagonal)  After this
    598 		 * step, temp[k] = (failed 2), the failed data to be recovered */
    599 #if RF_EO_MATRIX_DIM > 17
    600 		for (k = 0; k < shortsPerEU; k++)
    601 			temp[k] ^= P[k];
    602 		/* Put the data to the destination buffer                              */
    603 		for (k = 0; k < shortsPerEU; k++)
    604 			dest_larger_current[k] = temp[k];
    605 #elif RF_EO_MATRIX_DIM == 17
    606 		for (k = 0; k < longsPerEU; k++)
    607 			temp[k] ^= P[k];
    608 		/* Put the data to the destination buffer                              */
    609 		for (k = 0; k < longsPerEU; k++)
    610 			dest_larger_current[k] = temp[k];
    611 #endif
    612 
    613 		/**          THE FOLLOWING DO THE HORIZONTAL XOR                **/
    614 		/* step 1:  ^(SUM of A(row,0..m-3)), ie. all nonfailed data
    615 		 * columes    */
    616 		for (j = 0; j < numDataCol; j++) {
    617 			if (j == f1 || j == f2)
    618 				continue;
    619 #if RF_EO_MATRIX_DIM > 17
    620 			rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU;
    621 			for (k = 0; k < shortsPerEU; k++)
    622 				temp[k] ^= *(rrdbuf_current + k);
    623 #elif RF_EO_MATRIX_DIM == 17
    624 			rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU;
    625 			for (k = 0; k < longsPerEU; k++)
    626 				temp[k] ^= *(rrdbuf_current + k);
    627 #endif
    628 		}
    629 		/* step 2: ^A(row,m-1) */
    630 		/* step 3: Put the data to the destination buffer                             	 */
    631 #if RF_EO_MATRIX_DIM > 17
    632 		pbuf_current = (short *) pbuf + shortsPerEU * row;
    633 		for (k = 0; k < shortsPerEU; k++)
    634 			temp[k] ^= *(pbuf_current + k);
    635 		for (k = 0; k < shortsPerEU; k++)
    636 			dest_smaller_current[k] = temp[k];
    637 #elif RF_EO_MATRIX_DIM == 17
    638 		pbuf_current = (long *) pbuf + longsPerEU * row;
    639 		for (k = 0; k < longsPerEU; k++)
    640 			temp[k] ^= *(pbuf_current + k);
    641 		for (k = 0; k < longsPerEU; k++)
    642 			dest_smaller_current[k] = temp[k];
    643 #endif
    644 		count++;
    645 	}
    646 	/* Check if all Encoding Unit in the data buffer have been decoded,
    647 	 * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
    648 	 * this algorithm will covered all buffer 				 */
    649 	RF_ASSERT(count == numRowInEncMatix);
    650 	RF_Free((char *) P, bytesPerEU);
    651 	RF_Free((char *) temp, bytesPerEU);
    652 }
    653 
    654 
    655 /***************************************************************************************
    656 * 	This function is called by double degragded read
    657 * 	EO_200_CreateReadDAG
    658 *
    659 ***************************************************************************************/
    660 int
    661 rf_EvenOddDoubleRecoveryFunc(node)
    662 	RF_DagNode_t *node;
    663 {
    664 	int     ndataParam = 0;
    665 	int     np = node->numParams;
    666 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    667 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    668 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    669 	int     i, prm, sector, nresults = node->numResults;
    670 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    671 	unsigned sosAddr;
    672 	int     two = 0, mallc_one = 0, mallc_two = 0;	/* flags to indicate if
    673 							 * memory is allocated */
    674 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    675 	RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1,
    676 	        npda;
    677 	RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
    678 	char  **buf, *ebuf, *pbuf, *dest[2];
    679 	long   *suoff = NULL, *suend = NULL, *prmToCol = NULL,
    680 	    psuoff = 0, esuoff = 0;
    681 	RF_SectorNum_t startSector, endSector;
    682 	RF_Etimer_t timer;
    683 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    684 
    685 	RF_ETIMER_START(timer);
    686 
    687 	/* Find out the number of parameters which are pdas for data
    688 	 * information */
    689 	for (i = 0; i <= np; i++)
    690 		if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) {
    691 			ndataParam = i;
    692 			break;
    693 		}
    694 	RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
    695 	if (ndataParam != 0) {
    696 		RF_Malloc(suoff, ndataParam * sizeof(long), (long *));
    697 		RF_Malloc(suend, ndataParam * sizeof(long), (long *));
    698 		RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *));
    699 	}
    700 	if (asmap->failedPDAs[1] &&
    701 	    (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
    702 		RF_ASSERT(0);	/* currently, no support for this situation */
    703 		ppda = node->params[np - 6].p;
    704 		ppda2 = node->params[np - 5].p;
    705 		RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY);
    706 		epda = node->params[np - 4].p;
    707 		epda2 = node->params[np - 3].p;
    708 		RF_ASSERT(epda2->type == RF_PDA_TYPE_Q);
    709 		two = 1;
    710 	} else {
    711 		ppda = node->params[np - 4].p;
    712 		epda = node->params[np - 3].p;
    713 		psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
    714 		esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
    715 		RF_ASSERT(psuoff == esuoff);
    716 	}
    717 	/*
    718             the followings have three goals:
    719             1. determine the startSector to begin decoding and endSector to end decoding.
    720             2. determine the colume numbers of the two failed disks.
    721             3. determine the offset and end offset of the access within each failed stripe unit.
    722          */
    723 	if (nresults == 1) {
    724 		/* find the startSector to begin decoding */
    725 		pda = node->results[0];
    726 		memset(pda->bufPtr, 0, bytesPerSector * pda->numSector);
    727 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    728 		fsuend[0] = fsuoff[0] + pda->numSector;
    729 		fsuoff[1] = 0;
    730 		fsuend[1] = 0;
    731 		startSector = fsuoff[0];
    732 		endSector = fsuend[0];
    733 
    734 		/* find out the column of failed disk being accessed */
    735 		fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress);
    736 
    737 		/* find out the other failed colume not accessed */
    738 		sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    739 		for (i = 0; i < numDataCol; i++) {
    740 			npda.raidAddress = sosAddr + (i * secPerSU);
    741 			(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0);
    742 			/* skip over dead disks */
    743 			if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status))
    744 				if (i != fcol[0])
    745 					break;
    746 		}
    747 		RF_ASSERT(i < numDataCol);
    748 		fcol[1] = i;
    749 	} else {
    750 		RF_ASSERT(nresults == 2);
    751 		pda0 = node->results[0];
    752 		memset(pda0->bufPtr, 0, bytesPerSector * pda0->numSector);
    753 		pda1 = node->results[1];
    754 		memset(pda1->bufPtr, 0, bytesPerSector * pda1->numSector);
    755 		/* determine the failed colume numbers of the two failed
    756 		 * disks. */
    757 		fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress);
    758 		fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress);
    759 		/* determine the offset and end offset of the access within
    760 		 * each failed stripe unit. */
    761 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector);
    762 		fsuend[0] = fsuoff[0] + pda0->numSector;
    763 		fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector);
    764 		fsuend[1] = fsuoff[1] + pda1->numSector;
    765 		/* determine the startSector to begin decoding */
    766 		startSector = RF_MIN(pda0->startSector, pda1->startSector);
    767 		/* determine the endSector to end decoding */
    768 		endSector = RF_MAX(fsuend[0], fsuend[1]);
    769 	}
    770 	/*
    771 	      assign the beginning sector and the end sector for each parameter
    772 	      find out the corresponding colume # for each parameter
    773         */
    774 	for (prm = 0; prm < ndataParam; prm++) {
    775 		pda = node->params[prm].p;
    776 		suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    777 		suend[prm] = suoff[prm] + pda->numSector;
    778 		prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress);
    779 	}
    780 	/* 'sector' is the sector for the current decoding algorithm. For each
    781 	 * sector in the failed SU, find out the corresponding parameters that
    782 	 * cover the current sector and that are needed for decoding of this
    783 	 * sector in failed SU. 2.  Find out if sector is in the shadow of any
    784 	 * accessed failed SU. If not, malloc a temporary space of a sector in
    785 	 * size. */
    786 	for (sector = startSector; sector < endSector; sector++) {
    787 		if (nresults == 2)
    788 			if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1]))
    789 				continue;
    790 		for (prm = 0; prm < ndataParam; prm++)
    791 			if (suoff[prm] <= sector && sector < suend[prm])
    792 				buf[(prmToCol[prm])] = (char *)((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr +
    793 				    rf_RaidAddressToByte(raidPtr, sector - suoff[prm]);
    794 		/* find out if sector is in the shadow of any accessed failed
    795 		 * SU. If yes, assign dest[0], dest[1] to point at suitable
    796 		 * position of the buffer corresponding to failed SUs. if no,
    797 		 * malloc a temporary space of a sector in size for
    798 		 * destination of decoding. */
    799 		RF_ASSERT(nresults == 1 || nresults == 2);
    800 		if (nresults == 1) {
    801 			dest[0] = (char *)((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
    802 			/* Always malloc temp buffer to dest[1]  */
    803 			RF_Malloc(dest[1], bytesPerSector, (char *));
    804 			memset(dest[1], 0, bytesPerSector);
    805 			mallc_two = 1;
    806 		} else {
    807 			if (fsuoff[0] <= sector && sector < fsuend[0])
    808 				dest[0] = (char *)((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
    809 			else {
    810 				RF_Malloc(dest[0], bytesPerSector, (char *));
    811 				memset(dest[0], 0, bytesPerSector);
    812 				mallc_one = 1;
    813 			}
    814 			if (fsuoff[1] <= sector && sector < fsuend[1])
    815 				dest[1] = (char *)((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]);
    816 			else {
    817 				RF_Malloc(dest[1], bytesPerSector, (char *));
    818 				memset(dest[1], 0, bytesPerSector);
    819 				mallc_two = 1;
    820 			}
    821 			RF_ASSERT(mallc_one == 0 || mallc_two == 0);
    822 		}
    823 		pbuf = (char *)ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff);
    824 		ebuf = (char *)epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff);
    825 		/*
    826 	         * After finish finding all needed sectors, call doubleEOdecode function for decoding
    827 	         * one sector to destination.
    828 	         */
    829 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
    830 		/* free all allocated memory, and mark flag to indicate no
    831 		 * memory is being allocated */
    832 		if (mallc_one == 1)
    833 			RF_Free(dest[0], bytesPerSector);
    834 		if (mallc_two == 1)
    835 			RF_Free(dest[1], bytesPerSector);
    836 		mallc_one = mallc_two = 0;
    837 	}
    838 	RF_Free(buf, numDataCol * sizeof(char *));
    839 	if (ndataParam != 0) {
    840 		RF_Free(suoff, ndataParam * sizeof(long));
    841 		RF_Free(suend, ndataParam * sizeof(long));
    842 		RF_Free(prmToCol, ndataParam * sizeof(long));
    843 	}
    844 	RF_ETIMER_STOP(timer);
    845 	RF_ETIMER_EVAL(timer);
    846 	if (tracerec) {
    847 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    848 	}
    849 	rf_GenericWakeupFunc(node, 0);
    850 #if 1
    851 	return (0);		/* XXX is this even close!!?!?!!? GO */
    852 #endif
    853 }
    854 
    855 
    856 /* currently, only access of one of the two failed SU is allowed in this function.
    857  * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
    858  * many accesses of single stripe unit.
    859  */
    860 
    861 int
    862 rf_EOWriteDoubleRecoveryFunc(node)
    863 	RF_DagNode_t *node;
    864 {
    865 	int     np = node->numParams;
    866 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    867 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    868 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    869 	RF_SectorNum_t sector;
    870 	RF_RowCol_t col, scol;
    871 	int     prm, i, j;
    872 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    873 	unsigned sosAddr;
    874 	unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    875 	RF_int64 numbytes;
    876 	RF_SectorNum_t startSector, endSector;
    877 	RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda;
    878 	RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
    879 	char  **buf;		/* buf[0], buf[1], buf[2], ...etc. point to
    880 				 * buffer storing data read from col0, col1,
    881 				 * col2 */
    882 	char   *ebuf, *pbuf, *dest[2], *olddata[2];
    883 	RF_Etimer_t timer;
    884 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    885 
    886 	RF_ASSERT(asmap->numDataFailed == 1);	/* currently only support this
    887 						 * case, the other failed SU
    888 						 * is not being accessed */
    889 	RF_ETIMER_START(timer);
    890 	RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
    891 
    892 	ppda = node->results[0];/* Instead of being buffers, node->results[0]
    893 				 * and [1] are Ppda and Epda  */
    894 	epda = node->results[1];
    895 	fpda = asmap->failedPDAs[0];
    896 
    897 	/* First, recovery the failed old SU using EvenOdd double decoding      */
    898 	/* determine the startSector and endSector for decoding */
    899 	startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector);
    900 	endSector = startSector + fpda->numSector;
    901 	/* Assign buf[col] pointers to point to each non-failed colume  and
    902 	 * initialize the pbuf and ebuf to point at the beginning of each
    903 	 * source buffers and destination buffers */
    904 	for (prm = 0; prm < numDataCol - 2; prm++) {
    905 		pda = (RF_PhysDiskAddr_t *) node->params[prm].p;
    906 		col = rf_EUCol(layoutPtr, pda->raidAddress);
    907 		buf[col] = pda->bufPtr;
    908 	}
    909 	/* pbuf and ebuf:  they will change values as double recovery decoding
    910 	 * goes on */
    911 	pbuf = ppda->bufPtr;
    912 	ebuf = epda->bufPtr;
    913 	/* find out the logical colume numbers in the encoding matrix of the
    914 	 * two failed columes */
    915 	fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress);
    916 
    917 	/* find out the other failed colume not accessed this time */
    918 	sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    919 	for (i = 0; i < numDataCol; i++) {
    920 		npda.raidAddress = sosAddr + (i * secPerSU);
    921 		(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0);
    922 		/* skip over dead disks */
    923 		if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status))
    924 			if (i != fcol[0])
    925 				break;
    926 	}
    927 	RF_ASSERT(i < numDataCol);
    928 	fcol[1] = i;
    929 	/* assign temporary space to put recovered failed SU */
    930 	numbytes = fpda->numSector * bytesPerSector;
    931 	RF_Malloc(olddata[0], numbytes, (char *));
    932 	RF_Malloc(olddata[1], numbytes, (char *));
    933 	dest[0] = olddata[0];
    934 	dest[1] = olddata[1];
    935 	memset(olddata[0], 0, numbytes);
    936 	memset(olddata[1], 0, numbytes);
    937 	/* Begin the recovery decoding, initially buf[j],  ebuf, pbuf, dest[j]
    938 	 * have already pointed at the beginning of each source buffers and
    939 	 * destination buffers */
    940 	for (sector = startSector, i = 0; sector < endSector; sector++, i++) {
    941 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
    942 		for (j = 0; j < numDataCol; j++)
    943 			if ((j != fcol[0]) && (j != fcol[1]))
    944 				buf[j] += bytesPerSector;
    945 		dest[0] += bytesPerSector;
    946 		dest[1] += bytesPerSector;
    947 		ebuf += bytesPerSector;
    948 		pbuf += bytesPerSector;
    949 	}
    950 	/* after recovery, the buffer pointed by olddata[0] is the old failed
    951 	 * data. With new writing data and this old data, use small write to
    952 	 * calculate the new redundant informations */
    953 	/* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of
    954 	 * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol
    955 	 * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[
    956 	 * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol
    957 	 * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of
    958 	 * wudNodes; For current implementation, we assume the simplest case:
    959 	 * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1
    960 	 * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new
    961 	 * data to be writen to the failed disk. We first bxor the new data
    962 	 * into the old recovered data, then do the same things as small
    963 	 * write. */
    964 
    965 	rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes);
    966 	/* do new 'E' calculation  */
    967 	/* find out the corresponding colume in encoding matrix for write
    968 	 * colume to be encoded into redundant disk 'E' */
    969 	scol = rf_EUCol(layoutPtr, fpda->raidAddress);
    970 	/* olddata[0] now is source buffer pointer; epda->bufPtr is the dest
    971 	 * buffer pointer               */
    972 	rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
    973 
    974 	/* do new 'P' calculation  */
    975 	rf_bxor(olddata[0], ppda->bufPtr, numbytes);
    976 	/* Free the allocated buffer  */
    977 	RF_Free(olddata[0], numbytes);
    978 	RF_Free(olddata[1], numbytes);
    979 	RF_Free(buf, numDataCol * sizeof(char *));
    980 
    981 	RF_ETIMER_STOP(timer);
    982 	RF_ETIMER_EVAL(timer);
    983 	if (tracerec) {
    984 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    985 	}
    986 	rf_GenericWakeupFunc(node, 0);
    987 	return (0);
    988 }
    989 #endif				/* RF_INCLUDE_EVENODD > 0 */
    990