Home | History | Annotate | Line # | Download | only in raidframe
rf_evenodd_dagfuncs.c revision 1.18
      1 /*	$NetBSD: rf_evenodd_dagfuncs.c,v 1.18 2007/03/04 06:02:38 christos Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: ChangMing Wu
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  * Code for RAID-EVENODD  architecture.
     31  */
     32 
     33 #include <sys/cdefs.h>
     34 __KERNEL_RCSID(0, "$NetBSD: rf_evenodd_dagfuncs.c,v 1.18 2007/03/04 06:02:38 christos Exp $");
     35 
     36 #include "rf_archs.h"
     37 #include "opt_raid_diagnostic.h"
     38 
     39 #if RF_INCLUDE_EVENODD > 0
     40 
     41 #include <dev/raidframe/raidframevar.h>
     42 
     43 #include "rf_raid.h"
     44 #include "rf_dag.h"
     45 #include "rf_dagffrd.h"
     46 #include "rf_dagffwr.h"
     47 #include "rf_dagdegrd.h"
     48 #include "rf_dagdegwr.h"
     49 #include "rf_dagutils.h"
     50 #include "rf_dagfuncs.h"
     51 #include "rf_etimer.h"
     52 #include "rf_general.h"
     53 #include "rf_parityscan.h"
     54 #include "rf_evenodd.h"
     55 #include "rf_evenodd_dagfuncs.h"
     56 
     57 /* These redundant functions are for small write */
     58 RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P"};
     59 RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E"};
     60 /* These redundant functions are for degraded read */
     61 RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
     62 RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func"};
     63 /**********************************************************************************************
     64  *   the following encoding node functions is used in  EO_000_CreateLargeWriteDAG
     65  **********************************************************************************************/
     66 int
     67 rf_RegularPEFunc(node)
     68 	RF_DagNode_t *node;
     69 {
     70 	rf_RegularESubroutine(node, node->results[1]);
     71 	rf_RegularXorFunc(node);/* does the wakeup here! */
     72 #if 1
     73 	return (0);		/* XXX This was missing... GO */
     74 #endif
     75 }
     76 
     77 
     78 /************************************************************************************************
     79  *  For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
     80  *  be used. The previous case is when write access at least sectors of full stripe unit.
     81  *  The later function is used when the write access two stripe units but with total sectors
     82  *  less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
     83  *  areas in their stripe unit and  parity write and 'E' write are both devided into two distinct
     84  *  writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
     85  ************************************************************************************************/
     86 
     87 /* Algorithm:
     88      1. Store the difference of old data and new data in the Rod buffer.
     89      2. then encode this buffer into the buffer which already have old 'E' information inside it,
     90 	the result can be shown to be the new 'E' information.
     91      3. xor the Wnd buffer into the difference buffer to recover the  original old data.
     92    Here we have another alternative: to allocate a temporary buffer for storing the difference of
     93    old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
     94    take the same speed as the previous, and need more memory.
     95 */
     96 int
     97 rf_RegularONEFunc(node)
     98 	RF_DagNode_t *node;
     99 {
    100 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    101 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    102 	int     EpdaIndex = (node->numParams - 1) / 2 - 1;	/* the parameter of node
    103 								 * where you can find
    104 								 * e-pda */
    105 	int     i, k, retcode = 0;
    106 	int     suoffset, length;
    107 	RF_RowCol_t scol;
    108 	char   *srcbuf, *destbuf;
    109 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    110 	RF_Etimer_t timer;
    111 	RF_PhysDiskAddr_t *pda;
    112 #ifdef RAID_DIAGNOSTIC
    113 	RF_PhysDiskAddr_t *EPDA =
    114 	    (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
    115 	int     ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);
    116 #endif /* RAID_DIAGNOSTIC */
    117 
    118 	RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
    119 	RF_ASSERT(ESUOffset == 0);
    120 
    121 	RF_ETIMER_START(timer);
    122 
    123 	/* Xor the Wnd buffer into Rod buffer, the difference of old data and
    124 	 * new data is stored in Rod buffer */
    125 	for (k = 0; k < EpdaIndex; k += 2) {
    126 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
    127 		retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length);
    128 	}
    129 	/* Start to encoding the buffer storing the difference of old data and
    130 	 * new data into 'E' buffer  */
    131 	for (i = 0; i < EpdaIndex; i += 2)
    132 		if (node->params[i + 1].p != node->results[0]) {	/* results[0] is buf ptr
    133 									 * of E */
    134 			pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    135 			srcbuf = (char *) node->params[i + 1].p;
    136 			scol = rf_EUCol(layoutPtr, pda->raidAddress);
    137 			suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    138 			destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset);
    139 			rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    140 		}
    141 	/* Recover the original old data to be used by parity encoding
    142 	 * function in XorNode */
    143 	for (k = 0; k < EpdaIndex; k += 2) {
    144 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
    145 		retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length);
    146 	}
    147 	RF_ETIMER_STOP(timer);
    148 	RF_ETIMER_EVAL(timer);
    149 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    150 	rf_GenericWakeupFunc(node, 0);
    151 #if 1
    152 	return (0);		/* XXX this was missing.. GO */
    153 #endif
    154 }
    155 
    156 int
    157 rf_SimpleONEFunc(node)
    158 	RF_DagNode_t *node;
    159 {
    160 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    161 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    162 	RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
    163 	int     retcode = 0;
    164 	char   *srcbuf, *destbuf;
    165 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    166 	int     length;
    167 	RF_RowCol_t scol;
    168 	RF_Etimer_t timer;
    169 
    170 	RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q);
    171 	if (node->dagHdr->status == rf_enable) {
    172 		RF_ETIMER_START(timer);
    173 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector);	/* this is a pda of
    174 														 * writeDataNodes */
    175 		/* bxor to buffer of readDataNodes */
    176 		retcode = rf_bxor(node->params[5].p, node->params[1].p, length);
    177 		/* find out the corresponding colume in encoding matrix for
    178 		 * write colume to be encoded into redundant disk 'E' */
    179 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    180 		srcbuf = node->params[1].p;
    181 		destbuf = node->params[3].p;
    182 		/* Start encoding process */
    183 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    184 		rf_bxor(node->params[5].p, node->params[1].p, length);
    185 		RF_ETIMER_STOP(timer);
    186 		RF_ETIMER_EVAL(timer);
    187 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    188 
    189 	}
    190 	return (rf_GenericWakeupFunc(node, retcode));	/* call wake func
    191 							 * explicitly since no
    192 							 * I/O in this node */
    193 }
    194 
    195 
    196 /****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write  ********/
    197 void
    198 rf_RegularESubroutine(node, ebuf)
    199 	RF_DagNode_t *node;
    200 	char   *ebuf;
    201 {
    202 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    203 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    204 	RF_PhysDiskAddr_t *pda;
    205 	int     i, suoffset;
    206 	RF_RowCol_t scol;
    207 	char   *srcbuf, *destbuf;
    208 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    209 	RF_Etimer_t timer;
    210 
    211 	RF_ETIMER_START(timer);
    212 	for (i = 0; i < node->numParams - 2; i += 2) {
    213 		RF_ASSERT(node->params[i + 1].p != ebuf);
    214 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    215 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    216 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    217 		srcbuf = (char *) node->params[i + 1].p;
    218 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset);
    219 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    220 	}
    221 	RF_ETIMER_STOP(timer);
    222 	RF_ETIMER_EVAL(timer);
    223 	tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    224 }
    225 
    226 
    227 /*******************************************************************************************
    228  *			 Used in  EO_001_CreateLargeWriteDAG
    229  ******************************************************************************************/
    230 int
    231 rf_RegularEFunc(node)
    232 	RF_DagNode_t *node;
    233 {
    234 	rf_RegularESubroutine(node, node->results[0]);
    235 	rf_GenericWakeupFunc(node, 0);
    236 #if 1
    237 	return (0);		/* XXX this was missing?.. GO */
    238 #endif
    239 }
    240 /*******************************************************************************************
    241  * This degraded function allow only two case:
    242  *  1. when write access the full failed stripe unit, then the access can be more than
    243  *     one tripe units.
    244  *  2. when write access only part of the failed SU, we assume accesses of more than
    245  *     one stripe unit is not allowed so that the write can be dealt with like a
    246  *     large write.
    247  *  The following function is based on these assumptions. So except in the second case,
    248  *  it looks the same as a large write encodeing function. But this is not exactly the
    249  *  normal way for doing a degraded write, since raidframe have to break cases of access
    250  *  other than the above two into smaller accesses. We may have to change
    251  *  DegrESubroutin in the future.
    252  *******************************************************************************************/
    253 void
    254 rf_DegrESubroutine(node, ebuf)
    255 	RF_DagNode_t *node;
    256 	char   *ebuf;
    257 {
    258 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    259 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    260 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    261 	RF_PhysDiskAddr_t *pda;
    262 	int     i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    263 	RF_RowCol_t scol;
    264 	char   *srcbuf, *destbuf;
    265 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    266 	RF_Etimer_t timer;
    267 
    268 	RF_ETIMER_START(timer);
    269 	for (i = 0; i < node->numParams - 2; i += 2) {
    270 		RF_ASSERT(node->params[i + 1].p != ebuf);
    271 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    272 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    273 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    274 		srcbuf = (char *) node->params[i + 1].p;
    275 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    276 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    277 	}
    278 
    279 	RF_ETIMER_STOP(timer);
    280 	RF_ETIMER_EVAL(timer);
    281 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    282 }
    283 
    284 
    285 /**************************************************************************************
    286  * This function is used in case where one data disk failed and both redundant disks
    287  * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
    288  * failed in the stripe but not accessed at this time, then we should, instead, use
    289  * the rf_EOWriteDoubleRecoveryFunc().
    290  **************************************************************************************/
    291 int
    292 rf_Degraded_100_EOFunc(node)
    293 	RF_DagNode_t *node;
    294 {
    295 	rf_DegrESubroutine(node, node->results[1]);
    296 	rf_RecoveryXorFunc(node);	/* does the wakeup here! */
    297 #if 1
    298 	return (0);		/* XXX this was missing... SHould these be
    299 				 * void functions??? GO */
    300 #endif
    301 }
    302 /**************************************************************************************
    303  * This function is to encode one sector in one of the data disks to the E disk.
    304  * However, in evenodd this function can also be used as decoding function to recover
    305  * data from dead disk in the case of parity failure and a single data failure.
    306  **************************************************************************************/
    307 void
    308 rf_e_EncOneSect(
    309     RF_RowCol_t srcLogicCol,
    310     char *srcSecbuf,
    311     RF_RowCol_t destLogicCol,
    312     char *destSecbuf,
    313     int bytesPerSector)
    314 {
    315 	int     S_index;	/* index of the EU in the src col which need
    316 				 * be Xored into all EUs in a dest sector */
    317 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
    318 	RF_RowCol_t j, indexInDest,	/* row index of an encoding unit in
    319 					 * the destination colume of encoding
    320 					 * matrix */
    321 	        indexInSrc;	/* row index of an encoding unit in the source
    322 				 * colume used for recovery */
    323 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
    324 
    325 #if RF_EO_MATRIX_DIM > 17
    326 	int     shortsPerEU = bytesPerEU / sizeof(short);
    327 	short  *destShortBuf, *srcShortBuf1, *srcShortBuf2;
    328 	short temp1;
    329 #elif RF_EO_MATRIX_DIM == 17
    330 	int     longsPerEU = bytesPerEU / sizeof(long);
    331 	long   *destLongBuf, *srcLongBuf1, *srcLongBuf2;
    332 	long temp1;
    333 #endif
    334 
    335 #if RF_EO_MATRIX_DIM > 17
    336 	RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1);
    337 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
    338 #elif RF_EO_MATRIX_DIM == 17
    339 	RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4);
    340 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
    341 #endif
    342 
    343 	S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    344 #if RF_EO_MATRIX_DIM > 17
    345 	srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU);
    346 #elif RF_EO_MATRIX_DIM == 17
    347 	srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU);
    348 #endif
    349 
    350 	for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) {
    351 		indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    352 
    353 #if RF_EO_MATRIX_DIM > 17
    354 		destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU);
    355 		srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU);
    356 		for (j = 0; j < shortsPerEU; j++) {
    357 			temp1 = destShortBuf[j] ^ srcShortBuf1[j];
    358 			/* note: S_index won't be at the end row for any src
    359 			 * col! */
    360 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
    361 				destShortBuf[j] = (srcShortBuf2[j]) ^ temp1;
    362 			/* if indexInSrc is at the end row, ie.
    363 			 * RF_EO_MATRIX_DIM -1, then all elements are zero! */
    364 			else
    365 				destShortBuf[j] = temp1;
    366 		}
    367 
    368 #elif RF_EO_MATRIX_DIM == 17
    369 		destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU);
    370 		srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU);
    371 		for (j = 0; j < longsPerEU; j++) {
    372 			temp1 = destLongBuf[j] ^ srcLongBuf1[j];
    373 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
    374 				destLongBuf[j] = (srcLongBuf2[j]) ^ temp1;
    375 			else
    376 				destLongBuf[j] = temp1;
    377 		}
    378 #endif
    379 	}
    380 }
    381 
    382 void
    383 rf_e_encToBuf(
    384     RF_Raid_t * raidPtr,
    385     RF_RowCol_t srcLogicCol,
    386     char *srcbuf,
    387     RF_RowCol_t destLogicCol,
    388     char *destbuf,
    389     int numSector)
    390 {
    391 	int     i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    392 
    393 	for (i = 0; i < numSector; i++) {
    394 		rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
    395 		srcbuf += bytesPerSector;
    396 		destbuf += bytesPerSector;
    397 	}
    398 }
    399 /**************************************************************************************
    400  * when parity die and one data die, We use second redundant information, 'E',
    401  * to recover the data in dead disk. This function is used in the recovery node of
    402  * for EO_110_CreateReadDAG
    403  **************************************************************************************/
    404 int
    405 rf_RecoveryEFunc(node)
    406 	RF_DagNode_t *node;
    407 {
    408 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    409 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    410 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    411 	RF_RowCol_t scol,	/* source logical column */
    412 	        fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress);	/* logical column of
    413 									 * failed SU */
    414 	int     i;
    415 	RF_PhysDiskAddr_t *pda;
    416 	int     suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    417 	char   *srcbuf, *destbuf;
    418 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    419 	RF_Etimer_t timer;
    420 
    421 	memset((char *) node->results[0], 0,
    422 	    rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
    423 	if (node->dagHdr->status == rf_enable) {
    424 		RF_ETIMER_START(timer);
    425 		for (i = 0; i < node->numParams - 2; i += 2)
    426 			if (node->params[i + 1].p != node->results[0]) {
    427 				pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    428 				if (i == node->numParams - 4)
    429 					scol = RF_EO_MATRIX_DIM - 2;	/* the colume of
    430 									 * redundant E */
    431 				else
    432 					scol = rf_EUCol(layoutPtr, pda->raidAddress);
    433 				srcbuf = (char *) node->params[i + 1].p;
    434 				suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    435 				destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    436 				rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
    437 			}
    438 		RF_ETIMER_STOP(timer);
    439 		RF_ETIMER_EVAL(timer);
    440 		tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    441 	}
    442 	return (rf_GenericWakeupFunc(node, 0));	/* node execute successfully */
    443 }
    444 /**************************************************************************************
    445  * This function is used in the case where one data and the parity have filed.
    446  * (in EO_110_CreateWriteDAG )
    447  **************************************************************************************/
    448 int
    449 rf_EO_DegradedWriteEFunc(RF_DagNode_t * node)
    450 {
    451 	rf_DegrESubroutine(node, node->results[0]);
    452 	rf_GenericWakeupFunc(node, 0);
    453 #if 1
    454 	return (0);		/* XXX Yet another one!! GO */
    455 #endif
    456 }
    457 
    458 
    459 
    460 /**************************************************************************************
    461  *  		THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
    462  **************************************************************************************/
    463 
    464 void
    465 rf_doubleEOdecode(
    466     RF_Raid_t * raidPtr,
    467     char **rrdbuf,
    468     char **dest,
    469     RF_RowCol_t * fcol,
    470     char *pbuf,
    471     char *ebuf)
    472 {
    473 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    474 	int     i, j, k, f1, f2, row;
    475 	int     rrdrow, erow, count = 0;
    476 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    477 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
    478 #if 0
    479 	int     pcol = (RF_EO_MATRIX_DIM) - 1;
    480 #endif
    481 	int     ecol = (RF_EO_MATRIX_DIM) - 2;
    482 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
    483 	int     numDataCol = layoutPtr->numDataCol;
    484 #if RF_EO_MATRIX_DIM > 17
    485 	int     shortsPerEU = bytesPerEU / sizeof(short);
    486 	short  *rrdbuf_current, *pbuf_current, *ebuf_current;
    487 	short  *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    488 	short *temp;
    489 	short  *P;
    490 
    491 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
    492 	RF_Malloc(P, bytesPerEU, (short *));
    493 	RF_Malloc(temp, bytesPerEU, (short *));
    494 #elif RF_EO_MATRIX_DIM == 17
    495 	int     longsPerEU = bytesPerEU / sizeof(long);
    496 	long   *rrdbuf_current, *pbuf_current, *ebuf_current;
    497 	long   *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    498 	long *temp;
    499 	long   *P;
    500 
    501 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
    502 	RF_Malloc(P, bytesPerEU, (long *));
    503 	RF_Malloc(temp, bytesPerEU, (long *));
    504 #endif
    505 	RF_ASSERT(*((long *) dest[0]) == 0);
    506 	RF_ASSERT(*((long *) dest[1]) == 0);
    507 	memset((char *) P, 0, bytesPerEU);
    508 	memset((char *) temp, 0, bytesPerEU);
    509 	RF_ASSERT(*P == 0);
    510 	/* calculate the 'P' parameter, which, not parity, is the Xor of all
    511 	 * elements in the last two column, ie. 'E' and 'parity' colume, see
    512 	 * the Ref. paper by Blaum, et al 1993  */
    513 	for (i = 0; i < numRowInEncMatix; i++)
    514 		for (k = 0; k < longsPerEU; k++) {
    515 #if RF_EO_MATRIX_DIM > 17
    516 			ebuf_current = ((short *) ebuf) + i * shortsPerEU + k;
    517 			pbuf_current = ((short *) pbuf) + i * shortsPerEU + k;
    518 #elif RF_EO_MATRIX_DIM == 17
    519 			ebuf_current = ((long *) ebuf) + i * longsPerEU + k;
    520 			pbuf_current = ((long *) pbuf) + i * longsPerEU + k;
    521 #endif
    522 			P[k] ^= *ebuf_current;
    523 			P[k] ^= *pbuf_current;
    524 		}
    525 	RF_ASSERT(fcol[0] != fcol[1]);
    526 	if (fcol[0] < fcol[1]) {
    527 #if RF_EO_MATRIX_DIM > 17
    528 		dest_smaller = (short *) (dest[0]);
    529 		dest_larger = (short *) (dest[1]);
    530 #elif RF_EO_MATRIX_DIM == 17
    531 		dest_smaller = (long *) (dest[0]);
    532 		dest_larger = (long *) (dest[1]);
    533 #endif
    534 		f1 = fcol[0];
    535 		f2 = fcol[1];
    536 	} else {
    537 #if RF_EO_MATRIX_DIM > 17
    538 		dest_smaller = (short *) (dest[1]);
    539 		dest_larger = (short *) (dest[0]);
    540 #elif RF_EO_MATRIX_DIM == 17
    541 		dest_smaller = (long *) (dest[1]);
    542 		dest_larger = (long *) (dest[0]);
    543 #endif
    544 		f1 = fcol[1];
    545 		f2 = fcol[0];
    546 	}
    547 	row = (RF_EO_MATRIX_DIM) - 1;
    548 	while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) {
    549 #if RF_EO_MATRIX_DIM > 17
    550 		dest_larger_current = dest_larger + row * shortsPerEU;
    551 		dest_smaller_current = dest_smaller + row * shortsPerEU;
    552 #elif RF_EO_MATRIX_DIM == 17
    553 		dest_larger_current = dest_larger + row * longsPerEU;
    554 		dest_smaller_current = dest_smaller + row * longsPerEU;
    555 #endif
    556 		/**    Do the diagonal recovery. Initially, temp[k] = (failed 1),
    557 		       which is the failed data in the colume which has smaller col index. **/
    558 		/* step 1:  ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3))         */
    559 		for (j = 0; j < numDataCol; j++) {
    560 			if (j == f1 || j == f2)
    561 				continue;
    562 			rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM);
    563 			if (rrdrow != (RF_EO_MATRIX_DIM) - 1) {
    564 #if RF_EO_MATRIX_DIM > 17
    565 				rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU;
    566 				for (k = 0; k < shortsPerEU; k++)
    567 					temp[k] ^= *(rrdbuf_current + k);
    568 #elif RF_EO_MATRIX_DIM == 17
    569 				rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU;
    570 				for (k = 0; k < longsPerEU; k++)
    571 					temp[k] ^= *(rrdbuf_current + k);
    572 #endif
    573 			}
    574 		}
    575 		/* step 2:  ^E(erow,m-2), If erow is at the buttom row, don't
    576 		 * Xor into it  E(erow,m-2) = (principle diagonal) ^ (failed
    577 		 * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal
    578 		 * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle
    579 		 * diagonal) ^ (failed 2)       */
    580 
    581 		erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM));
    582 		if (erow != (RF_EO_MATRIX_DIM) - 1) {
    583 #if RF_EO_MATRIX_DIM > 17
    584 			ebuf_current = (short *) ebuf + shortsPerEU * erow;
    585 			for (k = 0; k < shortsPerEU; k++)
    586 				temp[k] ^= *(ebuf_current + k);
    587 #elif RF_EO_MATRIX_DIM == 17
    588 			ebuf_current = (long *) ebuf + longsPerEU * erow;
    589 			for (k = 0; k < longsPerEU; k++)
    590 				temp[k] ^= *(ebuf_current + k);
    591 #endif
    592 		}
    593 		/* step 3: ^P to obtain the failed data (failed 2).  P can be
    594 		 * proved to be actually  (principle diagonal)  After this
    595 		 * step, temp[k] = (failed 2), the failed data to be recovered */
    596 #if RF_EO_MATRIX_DIM > 17
    597 		for (k = 0; k < shortsPerEU; k++)
    598 			temp[k] ^= P[k];
    599 		/* Put the data to the destination buffer                              */
    600 		for (k = 0; k < shortsPerEU; k++)
    601 			dest_larger_current[k] = temp[k];
    602 #elif RF_EO_MATRIX_DIM == 17
    603 		for (k = 0; k < longsPerEU; k++)
    604 			temp[k] ^= P[k];
    605 		/* Put the data to the destination buffer                              */
    606 		for (k = 0; k < longsPerEU; k++)
    607 			dest_larger_current[k] = temp[k];
    608 #endif
    609 
    610 		/**          THE FOLLOWING DO THE HORIZONTAL XOR                **/
    611 		/* step 1:  ^(SUM of A(row,0..m-3)), ie. all nonfailed data
    612 		 * columes    */
    613 		for (j = 0; j < numDataCol; j++) {
    614 			if (j == f1 || j == f2)
    615 				continue;
    616 #if RF_EO_MATRIX_DIM > 17
    617 			rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU;
    618 			for (k = 0; k < shortsPerEU; k++)
    619 				temp[k] ^= *(rrdbuf_current + k);
    620 #elif RF_EO_MATRIX_DIM == 17
    621 			rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU;
    622 			for (k = 0; k < longsPerEU; k++)
    623 				temp[k] ^= *(rrdbuf_current + k);
    624 #endif
    625 		}
    626 		/* step 2: ^A(row,m-1) */
    627 		/* step 3: Put the data to the destination buffer                             	 */
    628 #if RF_EO_MATRIX_DIM > 17
    629 		pbuf_current = (short *) pbuf + shortsPerEU * row;
    630 		for (k = 0; k < shortsPerEU; k++)
    631 			temp[k] ^= *(pbuf_current + k);
    632 		for (k = 0; k < shortsPerEU; k++)
    633 			dest_smaller_current[k] = temp[k];
    634 #elif RF_EO_MATRIX_DIM == 17
    635 		pbuf_current = (long *) pbuf + longsPerEU * row;
    636 		for (k = 0; k < longsPerEU; k++)
    637 			temp[k] ^= *(pbuf_current + k);
    638 		for (k = 0; k < longsPerEU; k++)
    639 			dest_smaller_current[k] = temp[k];
    640 #endif
    641 		count++;
    642 	}
    643 	/* Check if all Encoding Unit in the data buffer have been decoded,
    644 	 * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
    645 	 * this algorithm will covered all buffer 				 */
    646 	RF_ASSERT(count == numRowInEncMatix);
    647 	RF_Free((char *) P, bytesPerEU);
    648 	RF_Free((char *) temp, bytesPerEU);
    649 }
    650 
    651 
    652 /***************************************************************************************
    653 * 	This function is called by double degragded read
    654 * 	EO_200_CreateReadDAG
    655 *
    656 ***************************************************************************************/
    657 int
    658 rf_EvenOddDoubleRecoveryFunc(node)
    659 	RF_DagNode_t *node;
    660 {
    661 	int     ndataParam = 0;
    662 	int     np = node->numParams;
    663 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    664 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    665 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    666 	int     i, prm, sector, nresults = node->numResults;
    667 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    668 	unsigned sosAddr;
    669 	int     two = 0, mallc_one = 0, mallc_two = 0;	/* flags to indicate if
    670 							 * memory is allocated */
    671 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    672 	RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1,
    673 	        npda;
    674 	RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
    675 	char  **buf, *ebuf, *pbuf, *dest[2];
    676 	long   *suoff = NULL, *suend = NULL, *prmToCol = NULL,
    677 	    psuoff = 0, esuoff = 0;
    678 	RF_SectorNum_t startSector, endSector;
    679 	RF_Etimer_t timer;
    680 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    681 
    682 	RF_ETIMER_START(timer);
    683 
    684 	/* Find out the number of parameters which are pdas for data
    685 	 * information */
    686 	for (i = 0; i <= np; i++)
    687 		if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) {
    688 			ndataParam = i;
    689 			break;
    690 		}
    691 	RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
    692 	if (ndataParam != 0) {
    693 		RF_Malloc(suoff, ndataParam * sizeof(long), (long *));
    694 		RF_Malloc(suend, ndataParam * sizeof(long), (long *));
    695 		RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *));
    696 	}
    697 	if (asmap->failedPDAs[1] &&
    698 	    (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
    699 		RF_ASSERT(0);	/* currently, no support for this situation */
    700 		ppda = node->params[np - 6].p;
    701 		ppda2 = node->params[np - 5].p;
    702 		RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY);
    703 		epda = node->params[np - 4].p;
    704 		epda2 = node->params[np - 3].p;
    705 		RF_ASSERT(epda2->type == RF_PDA_TYPE_Q);
    706 		two = 1;
    707 	} else {
    708 		ppda = node->params[np - 4].p;
    709 		epda = node->params[np - 3].p;
    710 		psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
    711 		esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
    712 		RF_ASSERT(psuoff == esuoff);
    713 	}
    714 	/*
    715             the followings have three goals:
    716             1. determine the startSector to begin decoding and endSector to end decoding.
    717             2. determine the colume numbers of the two failed disks.
    718             3. determine the offset and end offset of the access within each failed stripe unit.
    719          */
    720 	if (nresults == 1) {
    721 		/* find the startSector to begin decoding */
    722 		pda = node->results[0];
    723 		memset(pda->bufPtr, 0, bytesPerSector * pda->numSector);
    724 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    725 		fsuend[0] = fsuoff[0] + pda->numSector;
    726 		fsuoff[1] = 0;
    727 		fsuend[1] = 0;
    728 		startSector = fsuoff[0];
    729 		endSector = fsuend[0];
    730 
    731 		/* find out the column of failed disk being accessed */
    732 		fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress);
    733 
    734 		/* find out the other failed colume not accessed */
    735 		sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    736 		for (i = 0; i < numDataCol; i++) {
    737 			npda.raidAddress = sosAddr + (i * secPerSU);
    738 			(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0);
    739 			/* skip over dead disks */
    740 			if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status))
    741 				if (i != fcol[0])
    742 					break;
    743 		}
    744 		RF_ASSERT(i < numDataCol);
    745 		fcol[1] = i;
    746 	} else {
    747 		RF_ASSERT(nresults == 2);
    748 		pda0 = node->results[0];
    749 		memset(pda0->bufPtr, 0, bytesPerSector * pda0->numSector);
    750 		pda1 = node->results[1];
    751 		memset(pda1->bufPtr, 0, bytesPerSector * pda1->numSector);
    752 		/* determine the failed colume numbers of the two failed
    753 		 * disks. */
    754 		fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress);
    755 		fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress);
    756 		/* determine the offset and end offset of the access within
    757 		 * each failed stripe unit. */
    758 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector);
    759 		fsuend[0] = fsuoff[0] + pda0->numSector;
    760 		fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector);
    761 		fsuend[1] = fsuoff[1] + pda1->numSector;
    762 		/* determine the startSector to begin decoding */
    763 		startSector = RF_MIN(pda0->startSector, pda1->startSector);
    764 		/* determine the endSector to end decoding */
    765 		endSector = RF_MAX(fsuend[0], fsuend[1]);
    766 	}
    767 	/*
    768 	      assign the beginning sector and the end sector for each parameter
    769 	      find out the corresponding colume # for each parameter
    770         */
    771 	for (prm = 0; prm < ndataParam; prm++) {
    772 		pda = node->params[prm].p;
    773 		suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    774 		suend[prm] = suoff[prm] + pda->numSector;
    775 		prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress);
    776 	}
    777 	/* 'sector' is the sector for the current decoding algorithm. For each
    778 	 * sector in the failed SU, find out the corresponding parameters that
    779 	 * cover the current sector and that are needed for decoding of this
    780 	 * sector in failed SU. 2.  Find out if sector is in the shadow of any
    781 	 * accessed failed SU. If not, malloc a temporary space of a sector in
    782 	 * size. */
    783 	for (sector = startSector; sector < endSector; sector++) {
    784 		if (nresults == 2)
    785 			if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1]))
    786 				continue;
    787 		for (prm = 0; prm < ndataParam; prm++)
    788 			if (suoff[prm] <= sector && sector < suend[prm])
    789 				buf[(prmToCol[prm])] = (char *)((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr +
    790 				    rf_RaidAddressToByte(raidPtr, sector - suoff[prm]);
    791 		/* find out if sector is in the shadow of any accessed failed
    792 		 * SU. If yes, assign dest[0], dest[1] to point at suitable
    793 		 * position of the buffer corresponding to failed SUs. if no,
    794 		 * malloc a temporary space of a sector in size for
    795 		 * destination of decoding. */
    796 		RF_ASSERT(nresults == 1 || nresults == 2);
    797 		if (nresults == 1) {
    798 			dest[0] = (char *)((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
    799 			/* Always malloc temp buffer to dest[1]  */
    800 			RF_Malloc(dest[1], bytesPerSector, (char *));
    801 			memset(dest[1], 0, bytesPerSector);
    802 			mallc_two = 1;
    803 		} else {
    804 			if (fsuoff[0] <= sector && sector < fsuend[0])
    805 				dest[0] = (char *)((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
    806 			else {
    807 				RF_Malloc(dest[0], bytesPerSector, (char *));
    808 				memset(dest[0], 0, bytesPerSector);
    809 				mallc_one = 1;
    810 			}
    811 			if (fsuoff[1] <= sector && sector < fsuend[1])
    812 				dest[1] = (char *)((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]);
    813 			else {
    814 				RF_Malloc(dest[1], bytesPerSector, (char *));
    815 				memset(dest[1], 0, bytesPerSector);
    816 				mallc_two = 1;
    817 			}
    818 			RF_ASSERT(mallc_one == 0 || mallc_two == 0);
    819 		}
    820 		pbuf = (char *)ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff);
    821 		ebuf = (char *)epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff);
    822 		/*
    823 	         * After finish finding all needed sectors, call doubleEOdecode function for decoding
    824 	         * one sector to destination.
    825 	         */
    826 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
    827 		/* free all allocated memory, and mark flag to indicate no
    828 		 * memory is being allocated */
    829 		if (mallc_one == 1)
    830 			RF_Free(dest[0], bytesPerSector);
    831 		if (mallc_two == 1)
    832 			RF_Free(dest[1], bytesPerSector);
    833 		mallc_one = mallc_two = 0;
    834 	}
    835 	RF_Free(buf, numDataCol * sizeof(char *));
    836 	if (ndataParam != 0) {
    837 		RF_Free(suoff, ndataParam * sizeof(long));
    838 		RF_Free(suend, ndataParam * sizeof(long));
    839 		RF_Free(prmToCol, ndataParam * sizeof(long));
    840 	}
    841 	RF_ETIMER_STOP(timer);
    842 	RF_ETIMER_EVAL(timer);
    843 	if (tracerec) {
    844 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    845 	}
    846 	rf_GenericWakeupFunc(node, 0);
    847 #if 1
    848 	return (0);		/* XXX is this even close!!?!?!!? GO */
    849 #endif
    850 }
    851 
    852 
    853 /* currently, only access of one of the two failed SU is allowed in this function.
    854  * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
    855  * many accesses of single stripe unit.
    856  */
    857 
    858 int
    859 rf_EOWriteDoubleRecoveryFunc(node)
    860 	RF_DagNode_t *node;
    861 {
    862 	int     np = node->numParams;
    863 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    864 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    865 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    866 	RF_SectorNum_t sector;
    867 	RF_RowCol_t col, scol;
    868 	int     prm, i, j;
    869 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    870 	unsigned sosAddr;
    871 	unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    872 	RF_int64 numbytes;
    873 	RF_SectorNum_t startSector, endSector;
    874 	RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda;
    875 	RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
    876 	char  **buf;		/* buf[0], buf[1], buf[2], ...etc. point to
    877 				 * buffer storing data read from col0, col1,
    878 				 * col2 */
    879 	char   *ebuf, *pbuf, *dest[2], *olddata[2];
    880 	RF_Etimer_t timer;
    881 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    882 
    883 	RF_ASSERT(asmap->numDataFailed == 1);	/* currently only support this
    884 						 * case, the other failed SU
    885 						 * is not being accessed */
    886 	RF_ETIMER_START(timer);
    887 	RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
    888 
    889 	ppda = node->results[0];/* Instead of being buffers, node->results[0]
    890 				 * and [1] are Ppda and Epda  */
    891 	epda = node->results[1];
    892 	fpda = asmap->failedPDAs[0];
    893 
    894 	/* First, recovery the failed old SU using EvenOdd double decoding      */
    895 	/* determine the startSector and endSector for decoding */
    896 	startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector);
    897 	endSector = startSector + fpda->numSector;
    898 	/* Assign buf[col] pointers to point to each non-failed colume  and
    899 	 * initialize the pbuf and ebuf to point at the beginning of each
    900 	 * source buffers and destination buffers */
    901 	for (prm = 0; prm < numDataCol - 2; prm++) {
    902 		pda = (RF_PhysDiskAddr_t *) node->params[prm].p;
    903 		col = rf_EUCol(layoutPtr, pda->raidAddress);
    904 		buf[col] = pda->bufPtr;
    905 	}
    906 	/* pbuf and ebuf:  they will change values as double recovery decoding
    907 	 * goes on */
    908 	pbuf = ppda->bufPtr;
    909 	ebuf = epda->bufPtr;
    910 	/* find out the logical colume numbers in the encoding matrix of the
    911 	 * two failed columes */
    912 	fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress);
    913 
    914 	/* find out the other failed colume not accessed this time */
    915 	sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    916 	for (i = 0; i < numDataCol; i++) {
    917 		npda.raidAddress = sosAddr + (i * secPerSU);
    918 		(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0);
    919 		/* skip over dead disks */
    920 		if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status))
    921 			if (i != fcol[0])
    922 				break;
    923 	}
    924 	RF_ASSERT(i < numDataCol);
    925 	fcol[1] = i;
    926 	/* assign temporary space to put recovered failed SU */
    927 	numbytes = fpda->numSector * bytesPerSector;
    928 	RF_Malloc(olddata[0], numbytes, (char *));
    929 	RF_Malloc(olddata[1], numbytes, (char *));
    930 	dest[0] = olddata[0];
    931 	dest[1] = olddata[1];
    932 	memset(olddata[0], 0, numbytes);
    933 	memset(olddata[1], 0, numbytes);
    934 	/* Begin the recovery decoding, initially buf[j],  ebuf, pbuf, dest[j]
    935 	 * have already pointed at the beginning of each source buffers and
    936 	 * destination buffers */
    937 	for (sector = startSector, i = 0; sector < endSector; sector++, i++) {
    938 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
    939 		for (j = 0; j < numDataCol; j++)
    940 			if ((j != fcol[0]) && (j != fcol[1]))
    941 				buf[j] += bytesPerSector;
    942 		dest[0] += bytesPerSector;
    943 		dest[1] += bytesPerSector;
    944 		ebuf += bytesPerSector;
    945 		pbuf += bytesPerSector;
    946 	}
    947 	/* after recovery, the buffer pointed by olddata[0] is the old failed
    948 	 * data. With new writing data and this old data, use small write to
    949 	 * calculate the new redundant informations */
    950 	/* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of
    951 	 * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol
    952 	 * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[
    953 	 * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol
    954 	 * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of
    955 	 * wudNodes; For current implementation, we assume the simplest case:
    956 	 * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1
    957 	 * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new
    958 	 * data to be writen to the failed disk. We first bxor the new data
    959 	 * into the old recovered data, then do the same things as small
    960 	 * write. */
    961 
    962 	rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes);
    963 	/* do new 'E' calculation  */
    964 	/* find out the corresponding colume in encoding matrix for write
    965 	 * colume to be encoded into redundant disk 'E' */
    966 	scol = rf_EUCol(layoutPtr, fpda->raidAddress);
    967 	/* olddata[0] now is source buffer pointer; epda->bufPtr is the dest
    968 	 * buffer pointer               */
    969 	rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
    970 
    971 	/* do new 'P' calculation  */
    972 	rf_bxor(olddata[0], ppda->bufPtr, numbytes);
    973 	/* Free the allocated buffer  */
    974 	RF_Free(olddata[0], numbytes);
    975 	RF_Free(olddata[1], numbytes);
    976 	RF_Free(buf, numDataCol * sizeof(char *));
    977 
    978 	RF_ETIMER_STOP(timer);
    979 	RF_ETIMER_EVAL(timer);
    980 	if (tracerec) {
    981 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    982 	}
    983 	rf_GenericWakeupFunc(node, 0);
    984 	return (0);
    985 }
    986 #endif				/* RF_INCLUDE_EVENODD > 0 */
    987