Home | History | Annotate | Line # | Download | only in raidframe
rf_evenodd_dagfuncs.c revision 1.23
      1 /*	$NetBSD: rf_evenodd_dagfuncs.c,v 1.23 2019/02/09 03:34:00 christos Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: ChangMing Wu
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  * Code for RAID-EVENODD  architecture.
     31  */
     32 
     33 #include <sys/cdefs.h>
     34 __KERNEL_RCSID(0, "$NetBSD: rf_evenodd_dagfuncs.c,v 1.23 2019/02/09 03:34:00 christos Exp $");
     35 
     36 #include "rf_archs.h"
     37 
     38 #ifdef _KERNEL_OPT
     39 #include "opt_raid_diagnostic.h"
     40 #endif
     41 
     42 #if RF_INCLUDE_EVENODD > 0
     43 
     44 #include <dev/raidframe/raidframevar.h>
     45 
     46 #include "rf_raid.h"
     47 #include "rf_dag.h"
     48 #include "rf_dagffrd.h"
     49 #include "rf_dagffwr.h"
     50 #include "rf_dagdegrd.h"
     51 #include "rf_dagdegwr.h"
     52 #include "rf_dagutils.h"
     53 #include "rf_dagfuncs.h"
     54 #include "rf_etimer.h"
     55 #include "rf_general.h"
     56 #include "rf_parityscan.h"
     57 #include "rf_evenodd.h"
     58 #include "rf_evenodd_dagfuncs.h"
     59 
     60 /* These redundant functions are for small write */
     61 RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P"};
     62 RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E"};
     63 /* These redundant functions are for degraded read */
     64 RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
     65 RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func"};
     66 /**********************************************************************************************
     67  *   the following encoding node functions is used in  EO_000_CreateLargeWriteDAG
     68  **********************************************************************************************/
     69 int
     70 rf_RegularPEFunc(RF_DagNode_t *node)
     71 {
     72 	rf_RegularESubroutine(node, node->results[1]);
     73 	rf_RegularXorFunc(node);/* does the wakeup here! */
     74 #if 1
     75 	return (0);		/* XXX This was missing... GO */
     76 #endif
     77 }
     78 
     79 
     80 /************************************************************************************************
     81  *  For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
     82  *  be used. The previous case is when write access at least sectors of full stripe unit.
     83  *  The later function is used when the write access two stripe units but with total sectors
     84  *  less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
     85  *  areas in their stripe unit and  parity write and 'E' write are both devided into two distinct
     86  *  writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
     87  ************************************************************************************************/
     88 
     89 /* Algorithm:
     90      1. Store the difference of old data and new data in the Rod buffer.
     91      2. then encode this buffer into the buffer which already have old 'E' information inside it,
     92 	the result can be shown to be the new 'E' information.
     93      3. xor the Wnd buffer into the difference buffer to recover the  original old data.
     94    Here we have another alternative: to allocate a temporary buffer for storing the difference of
     95    old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
     96    take the same speed as the previous, and need more memory.
     97 */
     98 int
     99 rf_RegularONEFunc(RF_DagNode_t *node)
    100 {
    101 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    102 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    103 	int     EpdaIndex = (node->numParams - 1) / 2 - 1;	/* the parameter of node
    104 								 * where you can find
    105 								 * e-pda */
    106 	int     i, k;
    107 	int     suoffset, length;
    108 	RF_RowCol_t scol;
    109 	char   *srcbuf, *destbuf;
    110 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    111 	RF_Etimer_t timer;
    112 	RF_PhysDiskAddr_t *pda;
    113 #ifdef RAID_DIAGNOSTIC
    114 	RF_PhysDiskAddr_t *EPDA =
    115 	    (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
    116 	int     ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);
    117 
    118 	RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
    119 	RF_ASSERT(ESUOffset == 0);
    120 #endif /* RAID_DIAGNOSTIC */
    121 
    122 	RF_ETIMER_START(timer);
    123 
    124 	/* Xor the Wnd buffer into Rod buffer, the difference of old data and
    125 	 * new data is stored in Rod buffer */
    126 	for (k = 0; k < EpdaIndex; k += 2) {
    127 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
    128 		rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length);
    129 	}
    130 	/* Start to encoding the buffer storing the difference of old data and
    131 	 * new data into 'E' buffer  */
    132 	for (i = 0; i < EpdaIndex; i += 2)
    133 		if (node->params[i + 1].p != node->results[0]) {	/* results[0] is buf ptr
    134 									 * of E */
    135 			pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    136 			srcbuf = (char *) node->params[i + 1].p;
    137 			scol = rf_EUCol(layoutPtr, pda->raidAddress);
    138 			suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    139 			destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset);
    140 			rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    141 		}
    142 	/* Recover the original old data to be used by parity encoding
    143 	 * function in XorNode */
    144 	for (k = 0; k < EpdaIndex; k += 2) {
    145 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
    146 		rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length);
    147 	}
    148 	RF_ETIMER_STOP(timer);
    149 	RF_ETIMER_EVAL(timer);
    150 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    151 	rf_GenericWakeupFunc(node, 0);
    152 #if 1
    153 	return (0);		/* XXX this was missing.. GO */
    154 #endif
    155 }
    156 
    157 int
    158 rf_SimpleONEFunc(RF_DagNode_t *node)
    159 {
    160 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    161 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    162 	RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
    163 	int     retcode = 0;
    164 	char   *srcbuf, *destbuf;
    165 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    166 	int     length;
    167 	RF_RowCol_t scol;
    168 	RF_Etimer_t timer;
    169 
    170 	RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q);
    171 	if (node->dagHdr->status == rf_enable) {
    172 		RF_ETIMER_START(timer);
    173 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector);	/* this is a pda of
    174 														 * writeDataNodes */
    175 		/* bxor to buffer of readDataNodes */
    176 		retcode = rf_bxor(node->params[5].p, node->params[1].p, length);
    177 		/* find out the corresponding colume in encoding matrix for
    178 		 * write colume to be encoded into redundant disk 'E' */
    179 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    180 		srcbuf = node->params[1].p;
    181 		destbuf = node->params[3].p;
    182 		/* Start encoding process */
    183 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    184 		rf_bxor(node->params[5].p, node->params[1].p, length);
    185 		RF_ETIMER_STOP(timer);
    186 		RF_ETIMER_EVAL(timer);
    187 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    188 
    189 	}
    190 	return (rf_GenericWakeupFunc(node, retcode));	/* call wake func
    191 							 * explicitly since no
    192 							 * I/O in this node */
    193 }
    194 
    195 
    196 /****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write  ********/
    197 void
    198 rf_RegularESubroutine(RF_DagNode_t *node, char *ebuf)
    199 {
    200 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    201 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    202 	RF_PhysDiskAddr_t *pda;
    203 	int     i, suoffset;
    204 	RF_RowCol_t scol;
    205 	char   *srcbuf, *destbuf;
    206 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    207 	RF_Etimer_t timer;
    208 
    209 	RF_ETIMER_START(timer);
    210 	for (i = 0; i < node->numParams - 2; i += 2) {
    211 		RF_ASSERT(node->params[i + 1].p != ebuf);
    212 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    213 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    214 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    215 		srcbuf = (char *) node->params[i + 1].p;
    216 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset);
    217 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    218 	}
    219 	RF_ETIMER_STOP(timer);
    220 	RF_ETIMER_EVAL(timer);
    221 	tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    222 }
    223 
    224 
    225 /*******************************************************************************************
    226  *			 Used in  EO_001_CreateLargeWriteDAG
    227  ******************************************************************************************/
    228 int
    229 rf_RegularEFunc(RF_DagNode_t *node)
    230 {
    231 	rf_RegularESubroutine(node, node->results[0]);
    232 	rf_GenericWakeupFunc(node, 0);
    233 #if 1
    234 	return (0);		/* XXX this was missing?.. GO */
    235 #endif
    236 }
    237 /*******************************************************************************************
    238  * This degraded function allow only two case:
    239  *  1. when write access the full failed stripe unit, then the access can be more than
    240  *     one tripe units.
    241  *  2. when write access only part of the failed SU, we assume accesses of more than
    242  *     one stripe unit is not allowed so that the write can be dealt with like a
    243  *     large write.
    244  *  The following function is based on these assumptions. So except in the second case,
    245  *  it looks the same as a large write encodeing function. But this is not exactly the
    246  *  normal way for doing a degraded write, since raidframe have to break cases of access
    247  *  other than the above two into smaller accesses. We may have to change
    248  *  DegrESubroutin in the future.
    249  *******************************************************************************************/
    250 void
    251 rf_DegrESubroutine(RF_DagNode_t *node, char *ebuf)
    252 {
    253 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    254 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    255 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    256 	RF_PhysDiskAddr_t *pda;
    257 	int     i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    258 	RF_RowCol_t scol;
    259 	char   *srcbuf, *destbuf;
    260 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    261 	RF_Etimer_t timer;
    262 
    263 	RF_ETIMER_START(timer);
    264 	for (i = 0; i < node->numParams - 2; i += 2) {
    265 		RF_ASSERT(node->params[i + 1].p != ebuf);
    266 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    267 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    268 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    269 		srcbuf = (char *) node->params[i + 1].p;
    270 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    271 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    272 	}
    273 
    274 	RF_ETIMER_STOP(timer);
    275 	RF_ETIMER_EVAL(timer);
    276 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    277 }
    278 
    279 
    280 /**************************************************************************************
    281  * This function is used in case where one data disk failed and both redundant disks
    282  * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
    283  * failed in the stripe but not accessed at this time, then we should, instead, use
    284  * the rf_EOWriteDoubleRecoveryFunc().
    285  **************************************************************************************/
    286 int
    287 rf_Degraded_100_EOFunc(RF_DagNode_t *node)
    288 {
    289 	rf_DegrESubroutine(node, node->results[1]);
    290 	rf_RecoveryXorFunc(node);	/* does the wakeup here! */
    291 #if 1
    292 	return (0);		/* XXX this was missing... SHould these be
    293 				 * void functions??? GO */
    294 #endif
    295 }
    296 /**************************************************************************************
    297  * This function is to encode one sector in one of the data disks to the E disk.
    298  * However, in evenodd this function can also be used as decoding function to recover
    299  * data from dead disk in the case of parity failure and a single data failure.
    300  **************************************************************************************/
    301 void
    302 rf_e_EncOneSect(
    303     RF_RowCol_t srcLogicCol,
    304     char *srcSecbuf,
    305     RF_RowCol_t destLogicCol,
    306     char *destSecbuf,
    307     int bytesPerSector)
    308 {
    309 	int     S_index;	/* index of the EU in the src col which need
    310 				 * be Xored into all EUs in a dest sector */
    311 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
    312 	RF_RowCol_t j, indexInDest,	/* row index of an encoding unit in
    313 					 * the destination colume of encoding
    314 					 * matrix */
    315 	        indexInSrc;	/* row index of an encoding unit in the source
    316 				 * colume used for recovery */
    317 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
    318 
    319 #if RF_EO_MATRIX_DIM > 17
    320 	int     shortsPerEU = bytesPerEU / sizeof(short);
    321 	short  *destShortBuf, *srcShortBuf1, *srcShortBuf2;
    322 	short temp1;
    323 #elif RF_EO_MATRIX_DIM == 17
    324 	int     longsPerEU = bytesPerEU / sizeof(long);
    325 	long   *destLongBuf, *srcLongBuf1, *srcLongBuf2;
    326 	long temp1;
    327 #endif
    328 
    329 #if RF_EO_MATRIX_DIM > 17
    330 	RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1);
    331 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
    332 #elif RF_EO_MATRIX_DIM == 17
    333 	RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4);
    334 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
    335 #endif
    336 
    337 	S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    338 #if RF_EO_MATRIX_DIM > 17
    339 	srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU);
    340 #elif RF_EO_MATRIX_DIM == 17
    341 	srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU);
    342 #endif
    343 
    344 	for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) {
    345 		indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    346 
    347 #if RF_EO_MATRIX_DIM > 17
    348 		destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU);
    349 		srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU);
    350 		for (j = 0; j < shortsPerEU; j++) {
    351 			temp1 = destShortBuf[j] ^ srcShortBuf1[j];
    352 			/* note: S_index won't be at the end row for any src
    353 			 * col! */
    354 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
    355 				destShortBuf[j] = (srcShortBuf2[j]) ^ temp1;
    356 			/* if indexInSrc is at the end row, ie.
    357 			 * RF_EO_MATRIX_DIM -1, then all elements are zero! */
    358 			else
    359 				destShortBuf[j] = temp1;
    360 		}
    361 
    362 #elif RF_EO_MATRIX_DIM == 17
    363 		destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU);
    364 		srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU);
    365 		for (j = 0; j < longsPerEU; j++) {
    366 			temp1 = destLongBuf[j] ^ srcLongBuf1[j];
    367 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
    368 				destLongBuf[j] = (srcLongBuf2[j]) ^ temp1;
    369 			else
    370 				destLongBuf[j] = temp1;
    371 		}
    372 #endif
    373 	}
    374 }
    375 
    376 void
    377 rf_e_encToBuf(
    378     RF_Raid_t * raidPtr,
    379     RF_RowCol_t srcLogicCol,
    380     char *srcbuf,
    381     RF_RowCol_t destLogicCol,
    382     char *destbuf,
    383     int numSector)
    384 {
    385 	int     i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    386 
    387 	for (i = 0; i < numSector; i++) {
    388 		rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
    389 		srcbuf += bytesPerSector;
    390 		destbuf += bytesPerSector;
    391 	}
    392 }
    393 /**************************************************************************************
    394  * when parity die and one data die, We use second redundant information, 'E',
    395  * to recover the data in dead disk. This function is used in the recovery node of
    396  * for EO_110_CreateReadDAG
    397  **************************************************************************************/
    398 int
    399 rf_RecoveryEFunc(RF_DagNode_t *node)
    400 {
    401 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    402 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    403 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    404 	RF_RowCol_t scol,	/* source logical column */
    405 	        fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress);	/* logical column of
    406 									 * failed SU */
    407 	int     i;
    408 	RF_PhysDiskAddr_t *pda;
    409 	int     suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    410 	char   *srcbuf, *destbuf;
    411 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    412 	RF_Etimer_t timer;
    413 
    414 	memset(node->results[0], 0,
    415 	    rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
    416 	if (node->dagHdr->status == rf_enable) {
    417 		RF_ETIMER_START(timer);
    418 		for (i = 0; i < node->numParams - 2; i += 2)
    419 			if (node->params[i + 1].p != node->results[0]) {
    420 				pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    421 				if (i == node->numParams - 4)
    422 					scol = RF_EO_MATRIX_DIM - 2;	/* the colume of
    423 									 * redundant E */
    424 				else
    425 					scol = rf_EUCol(layoutPtr, pda->raidAddress);
    426 				srcbuf = (char *) node->params[i + 1].p;
    427 				suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    428 				destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    429 				rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
    430 			}
    431 		RF_ETIMER_STOP(timer);
    432 		RF_ETIMER_EVAL(timer);
    433 		tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    434 	}
    435 	return (rf_GenericWakeupFunc(node, 0));	/* node execute successfully */
    436 }
    437 /**************************************************************************************
    438  * This function is used in the case where one data and the parity have filed.
    439  * (in EO_110_CreateWriteDAG )
    440  **************************************************************************************/
    441 int
    442 rf_EO_DegradedWriteEFunc(RF_DagNode_t * node)
    443 {
    444 	rf_DegrESubroutine(node, node->results[0]);
    445 	rf_GenericWakeupFunc(node, 0);
    446 #if 1
    447 	return (0);		/* XXX Yet another one!! GO */
    448 #endif
    449 }
    450 
    451 
    452 
    453 /**************************************************************************************
    454  *  		THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
    455  **************************************************************************************/
    456 
    457 void
    458 rf_doubleEOdecode(
    459     RF_Raid_t * raidPtr,
    460     char **rrdbuf,
    461     char **dest,
    462     RF_RowCol_t * fcol,
    463     char *pbuf,
    464     char *ebuf)
    465 {
    466 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    467 	int     i, j, k, f1, f2, row;
    468 	int     rrdrow, erow, count = 0;
    469 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    470 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
    471 #if 0
    472 	int     pcol = (RF_EO_MATRIX_DIM) - 1;
    473 #endif
    474 	int     ecol = (RF_EO_MATRIX_DIM) - 2;
    475 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
    476 	int     numDataCol = layoutPtr->numDataCol;
    477 #if RF_EO_MATRIX_DIM > 17
    478 	int     shortsPerEU = bytesPerEU / sizeof(short);
    479 	short  *rrdbuf_current, *pbuf_current, *ebuf_current;
    480 	short  *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    481 	short *temp;
    482 	short  *P;
    483 
    484 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
    485 #elif RF_EO_MATRIX_DIM == 17
    486 	int     longsPerEU = bytesPerEU / sizeof(long);
    487 	long   *rrdbuf_current, *pbuf_current, *ebuf_current;
    488 	long   *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    489 	long *temp;
    490 	long   *P;
    491 
    492 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
    493 #endif
    494 	P = RF_Malloc(bytesPerEU);
    495 	temp = RF_Malloc(bytesPerEU);
    496 	RF_ASSERT(*((long *) dest[0]) == 0);
    497 	RF_ASSERT(*((long *) dest[1]) == 0);
    498 	RF_ASSERT(*P == 0);
    499 	/* calculate the 'P' parameter, which, not parity, is the Xor of all
    500 	 * elements in the last two column, ie. 'E' and 'parity' colume, see
    501 	 * the Ref. paper by Blaum, et al 1993  */
    502 	for (i = 0; i < numRowInEncMatix; i++)
    503 		for (k = 0; k < longsPerEU; k++) {
    504 #if RF_EO_MATRIX_DIM > 17
    505 			ebuf_current = ((short *) ebuf) + i * shortsPerEU + k;
    506 			pbuf_current = ((short *) pbuf) + i * shortsPerEU + k;
    507 #elif RF_EO_MATRIX_DIM == 17
    508 			ebuf_current = ((long *) ebuf) + i * longsPerEU + k;
    509 			pbuf_current = ((long *) pbuf) + i * longsPerEU + k;
    510 #endif
    511 			P[k] ^= *ebuf_current;
    512 			P[k] ^= *pbuf_current;
    513 		}
    514 	RF_ASSERT(fcol[0] != fcol[1]);
    515 	if (fcol[0] < fcol[1]) {
    516 #if RF_EO_MATRIX_DIM > 17
    517 		dest_smaller = (short *) (dest[0]);
    518 		dest_larger = (short *) (dest[1]);
    519 #elif RF_EO_MATRIX_DIM == 17
    520 		dest_smaller = (long *) (dest[0]);
    521 		dest_larger = (long *) (dest[1]);
    522 #endif
    523 		f1 = fcol[0];
    524 		f2 = fcol[1];
    525 	} else {
    526 #if RF_EO_MATRIX_DIM > 17
    527 		dest_smaller = (short *) (dest[1]);
    528 		dest_larger = (short *) (dest[0]);
    529 #elif RF_EO_MATRIX_DIM == 17
    530 		dest_smaller = (long *) (dest[1]);
    531 		dest_larger = (long *) (dest[0]);
    532 #endif
    533 		f1 = fcol[1];
    534 		f2 = fcol[0];
    535 	}
    536 	row = (RF_EO_MATRIX_DIM) - 1;
    537 	while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) {
    538 #if RF_EO_MATRIX_DIM > 17
    539 		dest_larger_current = dest_larger + row * shortsPerEU;
    540 		dest_smaller_current = dest_smaller + row * shortsPerEU;
    541 #elif RF_EO_MATRIX_DIM == 17
    542 		dest_larger_current = dest_larger + row * longsPerEU;
    543 		dest_smaller_current = dest_smaller + row * longsPerEU;
    544 #endif
    545 		/**    Do the diagonal recovery. Initially, temp[k] = (failed 1),
    546 		       which is the failed data in the colume which has smaller col index. **/
    547 		/* step 1:  ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3))         */
    548 		for (j = 0; j < numDataCol; j++) {
    549 			if (j == f1 || j == f2)
    550 				continue;
    551 			rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM);
    552 			if (rrdrow != (RF_EO_MATRIX_DIM) - 1) {
    553 #if RF_EO_MATRIX_DIM > 17
    554 				rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU;
    555 				for (k = 0; k < shortsPerEU; k++)
    556 					temp[k] ^= *(rrdbuf_current + k);
    557 #elif RF_EO_MATRIX_DIM == 17
    558 				rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU;
    559 				for (k = 0; k < longsPerEU; k++)
    560 					temp[k] ^= *(rrdbuf_current + k);
    561 #endif
    562 			}
    563 		}
    564 		/* step 2:  ^E(erow,m-2), If erow is at the buttom row, don't
    565 		 * Xor into it  E(erow,m-2) = (principle diagonal) ^ (failed
    566 		 * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal
    567 		 * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle
    568 		 * diagonal) ^ (failed 2)       */
    569 
    570 		erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM));
    571 		if (erow != (RF_EO_MATRIX_DIM) - 1) {
    572 #if RF_EO_MATRIX_DIM > 17
    573 			ebuf_current = (short *) ebuf + shortsPerEU * erow;
    574 			for (k = 0; k < shortsPerEU; k++)
    575 				temp[k] ^= *(ebuf_current + k);
    576 #elif RF_EO_MATRIX_DIM == 17
    577 			ebuf_current = (long *) ebuf + longsPerEU * erow;
    578 			for (k = 0; k < longsPerEU; k++)
    579 				temp[k] ^= *(ebuf_current + k);
    580 #endif
    581 		}
    582 		/* step 3: ^P to obtain the failed data (failed 2).  P can be
    583 		 * proved to be actually  (principle diagonal)  After this
    584 		 * step, temp[k] = (failed 2), the failed data to be recovered */
    585 #if RF_EO_MATRIX_DIM > 17
    586 		for (k = 0; k < shortsPerEU; k++)
    587 			temp[k] ^= P[k];
    588 		/* Put the data to the destination buffer                              */
    589 		for (k = 0; k < shortsPerEU; k++)
    590 			dest_larger_current[k] = temp[k];
    591 #elif RF_EO_MATRIX_DIM == 17
    592 		for (k = 0; k < longsPerEU; k++)
    593 			temp[k] ^= P[k];
    594 		/* Put the data to the destination buffer                              */
    595 		for (k = 0; k < longsPerEU; k++)
    596 			dest_larger_current[k] = temp[k];
    597 #endif
    598 
    599 		/**          THE FOLLOWING DO THE HORIZONTAL XOR                **/
    600 		/* step 1:  ^(SUM of A(row,0..m-3)), ie. all nonfailed data
    601 		 * columes    */
    602 		for (j = 0; j < numDataCol; j++) {
    603 			if (j == f1 || j == f2)
    604 				continue;
    605 #if RF_EO_MATRIX_DIM > 17
    606 			rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU;
    607 			for (k = 0; k < shortsPerEU; k++)
    608 				temp[k] ^= *(rrdbuf_current + k);
    609 #elif RF_EO_MATRIX_DIM == 17
    610 			rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU;
    611 			for (k = 0; k < longsPerEU; k++)
    612 				temp[k] ^= *(rrdbuf_current + k);
    613 #endif
    614 		}
    615 		/* step 2: ^A(row,m-1) */
    616 		/* step 3: Put the data to the destination buffer                             	 */
    617 #if RF_EO_MATRIX_DIM > 17
    618 		pbuf_current = (short *) pbuf + shortsPerEU * row;
    619 		for (k = 0; k < shortsPerEU; k++)
    620 			temp[k] ^= *(pbuf_current + k);
    621 		for (k = 0; k < shortsPerEU; k++)
    622 			dest_smaller_current[k] = temp[k];
    623 #elif RF_EO_MATRIX_DIM == 17
    624 		pbuf_current = (long *) pbuf + longsPerEU * row;
    625 		for (k = 0; k < longsPerEU; k++)
    626 			temp[k] ^= *(pbuf_current + k);
    627 		for (k = 0; k < longsPerEU; k++)
    628 			dest_smaller_current[k] = temp[k];
    629 #endif
    630 		count++;
    631 	}
    632 	/* Check if all Encoding Unit in the data buffer have been decoded,
    633 	 * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
    634 	 * this algorithm will covered all buffer 				 */
    635 	RF_ASSERT(count == numRowInEncMatix);
    636 	RF_Free((char *) P, bytesPerEU);
    637 	RF_Free((char *) temp, bytesPerEU);
    638 }
    639 
    640 
    641 /***************************************************************************************
    642 * 	This function is called by double degragded read
    643 * 	EO_200_CreateReadDAG
    644 *
    645 ***************************************************************************************/
    646 int
    647 rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t *node)
    648 {
    649 	int     ndataParam = 0;
    650 	int     np = node->numParams;
    651 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    652 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    653 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    654 	int     i, prm, sector, nresults = node->numResults;
    655 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    656 	unsigned sosAddr;
    657 	int     mallc_one = 0, mallc_two = 0;	/* flags to indicate if
    658 						 * memory is allocated */
    659 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    660 	RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1,
    661 	        npda;
    662 	RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
    663 	char  **buf, *ebuf, *pbuf, *dest[2];
    664 	long   *suoff = NULL, *suend = NULL, *prmToCol = NULL,
    665 	    psuoff = 0, esuoff = 0;
    666 	RF_SectorNum_t startSector, endSector;
    667 	RF_Etimer_t timer;
    668 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    669 
    670 	RF_ETIMER_START(timer);
    671 
    672 	/* Find out the number of parameters which are pdas for data
    673 	 * information */
    674 	for (i = 0; i <= np; i++)
    675 		if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) {
    676 			ndataParam = i;
    677 			break;
    678 		}
    679 	buf = RF_Malloc(numDataCol * sizeof(*buf));
    680 	if (ndataParam != 0) {
    681 		suoff = RF_Malloc(ndataParam * sizeof(*suoff));
    682 		suend = RF_Malloc(ndataParam * sizeof(*suend));
    683 		prmToCol = RF_Malloc(ndataParam * sizeof(*prmToCol));
    684 	}
    685 	if (asmap->failedPDAs[1] &&
    686 	    (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
    687 		RF_ASSERT(0);	/* currently, no support for this situation */
    688 		ppda = node->params[np - 6].p;
    689 		ppda2 = node->params[np - 5].p;
    690 		RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY);
    691 		epda = node->params[np - 4].p;
    692 		epda2 = node->params[np - 3].p;
    693 		RF_ASSERT(epda2->type == RF_PDA_TYPE_Q);
    694 	} else {
    695 		ppda = node->params[np - 4].p;
    696 		epda = node->params[np - 3].p;
    697 		psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
    698 		esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
    699 		RF_ASSERT(psuoff == esuoff);
    700 	}
    701 	/*
    702             the followings have three goals:
    703             1. determine the startSector to begin decoding and endSector to end decoding.
    704             2. determine the colume numbers of the two failed disks.
    705             3. determine the offset and end offset of the access within each failed stripe unit.
    706          */
    707 	if (nresults == 1) {
    708 		/* find the startSector to begin decoding */
    709 		pda = node->results[0];
    710 		memset(pda->bufPtr, 0, bytesPerSector * pda->numSector);
    711 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    712 		fsuend[0] = fsuoff[0] + pda->numSector;
    713 		fsuoff[1] = 0;
    714 		fsuend[1] = 0;
    715 		startSector = fsuoff[0];
    716 		endSector = fsuend[0];
    717 
    718 		/* find out the column of failed disk being accessed */
    719 		fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress);
    720 
    721 		/* find out the other failed colume not accessed */
    722 		sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    723 		for (i = 0; i < numDataCol; i++) {
    724 			npda.raidAddress = sosAddr + (i * secPerSU);
    725 			(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0);
    726 			/* skip over dead disks */
    727 			if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status))
    728 				if (i != fcol[0])
    729 					break;
    730 		}
    731 		RF_ASSERT(i < numDataCol);
    732 		fcol[1] = i;
    733 	} else {
    734 		RF_ASSERT(nresults == 2);
    735 		pda0 = node->results[0];
    736 		memset(pda0->bufPtr, 0, bytesPerSector * pda0->numSector);
    737 		pda1 = node->results[1];
    738 		memset(pda1->bufPtr, 0, bytesPerSector * pda1->numSector);
    739 		/* determine the failed colume numbers of the two failed
    740 		 * disks. */
    741 		fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress);
    742 		fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress);
    743 		/* determine the offset and end offset of the access within
    744 		 * each failed stripe unit. */
    745 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector);
    746 		fsuend[0] = fsuoff[0] + pda0->numSector;
    747 		fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector);
    748 		fsuend[1] = fsuoff[1] + pda1->numSector;
    749 		/* determine the startSector to begin decoding */
    750 		startSector = RF_MIN(pda0->startSector, pda1->startSector);
    751 		/* determine the endSector to end decoding */
    752 		endSector = RF_MAX(fsuend[0], fsuend[1]);
    753 	}
    754 	/*
    755 	      assign the beginning sector and the end sector for each parameter
    756 	      find out the corresponding colume # for each parameter
    757         */
    758 	for (prm = 0; prm < ndataParam; prm++) {
    759 		pda = node->params[prm].p;
    760 		suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    761 		suend[prm] = suoff[prm] + pda->numSector;
    762 		prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress);
    763 	}
    764 	/* 'sector' is the sector for the current decoding algorithm. For each
    765 	 * sector in the failed SU, find out the corresponding parameters that
    766 	 * cover the current sector and that are needed for decoding of this
    767 	 * sector in failed SU. 2.  Find out if sector is in the shadow of any
    768 	 * accessed failed SU. If not, malloc a temporary space of a sector in
    769 	 * size. */
    770 	for (sector = startSector; sector < endSector; sector++) {
    771 		if (nresults == 2)
    772 			if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1]))
    773 				continue;
    774 		for (prm = 0; prm < ndataParam; prm++)
    775 			if (suoff[prm] <= sector && sector < suend[prm])
    776 				buf[(prmToCol[prm])] = (char *)((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr +
    777 				    rf_RaidAddressToByte(raidPtr, sector - suoff[prm]);
    778 		/* find out if sector is in the shadow of any accessed failed
    779 		 * SU. If yes, assign dest[0], dest[1] to point at suitable
    780 		 * position of the buffer corresponding to failed SUs. if no,
    781 		 * malloc a temporary space of a sector in size for
    782 		 * destination of decoding. */
    783 		RF_ASSERT(nresults == 1 || nresults == 2);
    784 		if (nresults == 1) {
    785 			dest[0] = (char *)((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
    786 			/* Always malloc temp buffer to dest[1]  */
    787 			dest[1] = RF_Malloc(bytesPerSector);
    788 			mallc_two = 1;
    789 		} else {
    790 			if (fsuoff[0] <= sector && sector < fsuend[0])
    791 				dest[0] = (char *)((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
    792 			else {
    793 				dest[0] = RF_Malloc(bytesPerSector);
    794 				mallc_one = 1;
    795 			}
    796 			if (fsuoff[1] <= sector && sector < fsuend[1])
    797 				dest[1] = (char *)((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]);
    798 			else {
    799 				dest[1] = RF_Malloc(bytesPerSector);
    800 				mallc_two = 1;
    801 			}
    802 			RF_ASSERT(mallc_one == 0 || mallc_two == 0);
    803 		}
    804 		pbuf = (char *)ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff);
    805 		ebuf = (char *)epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff);
    806 		/*
    807 	         * After finish finding all needed sectors, call doubleEOdecode function for decoding
    808 	         * one sector to destination.
    809 	         */
    810 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
    811 		/* free all allocated memory, and mark flag to indicate no
    812 		 * memory is being allocated */
    813 		if (mallc_one == 1)
    814 			RF_Free(dest[0], bytesPerSector);
    815 		if (mallc_two == 1)
    816 			RF_Free(dest[1], bytesPerSector);
    817 		mallc_one = mallc_two = 0;
    818 	}
    819 	RF_Free(buf, numDataCol * sizeof(char *));
    820 	if (ndataParam != 0) {
    821 		RF_Free(suoff, ndataParam * sizeof(long));
    822 		RF_Free(suend, ndataParam * sizeof(long));
    823 		RF_Free(prmToCol, ndataParam * sizeof(long));
    824 	}
    825 	RF_ETIMER_STOP(timer);
    826 	RF_ETIMER_EVAL(timer);
    827 	if (tracerec) {
    828 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    829 	}
    830 	rf_GenericWakeupFunc(node, 0);
    831 #if 1
    832 	return (0);		/* XXX is this even close!!?!?!!? GO */
    833 #endif
    834 }
    835 
    836 
    837 /* currently, only access of one of the two failed SU is allowed in this function.
    838  * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
    839  * many accesses of single stripe unit.
    840  */
    841 
    842 int
    843 rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t *node)
    844 {
    845 	int     np = node->numParams;
    846 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    847 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    848 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    849 	RF_SectorNum_t sector;
    850 	RF_RowCol_t col, scol;
    851 	int     prm, i, j;
    852 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    853 	unsigned sosAddr;
    854 	unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    855 	RF_int64 numbytes;
    856 	RF_SectorNum_t startSector, endSector;
    857 	RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda;
    858 	RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
    859 	char  **buf;		/* buf[0], buf[1], buf[2], ...etc. point to
    860 				 * buffer storing data read from col0, col1,
    861 				 * col2 */
    862 	char   *ebuf, *pbuf, *dest[2], *olddata[2];
    863 	RF_Etimer_t timer;
    864 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    865 
    866 	RF_ASSERT(asmap->numDataFailed == 1);	/* currently only support this
    867 						 * case, the other failed SU
    868 						 * is not being accessed */
    869 	RF_ETIMER_START(timer);
    870 	buf = RF_Malloc(numDataCol * sizeof(*buf));
    871 
    872 	ppda = node->results[0];/* Instead of being buffers, node->results[0]
    873 				 * and [1] are Ppda and Epda  */
    874 	epda = node->results[1];
    875 	fpda = asmap->failedPDAs[0];
    876 
    877 	/* First, recovery the failed old SU using EvenOdd double decoding      */
    878 	/* determine the startSector and endSector for decoding */
    879 	startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector);
    880 	endSector = startSector + fpda->numSector;
    881 	/* Assign buf[col] pointers to point to each non-failed colume  and
    882 	 * initialize the pbuf and ebuf to point at the beginning of each
    883 	 * source buffers and destination buffers */
    884 	for (prm = 0; prm < numDataCol - 2; prm++) {
    885 		pda = (RF_PhysDiskAddr_t *) node->params[prm].p;
    886 		col = rf_EUCol(layoutPtr, pda->raidAddress);
    887 		buf[col] = pda->bufPtr;
    888 	}
    889 	/* pbuf and ebuf:  they will change values as double recovery decoding
    890 	 * goes on */
    891 	pbuf = ppda->bufPtr;
    892 	ebuf = epda->bufPtr;
    893 	/* find out the logical colume numbers in the encoding matrix of the
    894 	 * two failed columes */
    895 	fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress);
    896 
    897 	/* find out the other failed colume not accessed this time */
    898 	sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    899 	for (i = 0; i < numDataCol; i++) {
    900 		npda.raidAddress = sosAddr + (i * secPerSU);
    901 		(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0);
    902 		/* skip over dead disks */
    903 		if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status))
    904 			if (i != fcol[0])
    905 				break;
    906 	}
    907 	RF_ASSERT(i < numDataCol);
    908 	fcol[1] = i;
    909 	/* assign temporary space to put recovered failed SU */
    910 	numbytes = fpda->numSector * bytesPerSector;
    911 	olddata[0] = RF_Malloc(numbytes);
    912 	olddata[1] = RF_Malloc(numbytes);
    913 	dest[0] = olddata[0];
    914 	dest[1] = olddata[1];
    915 	/* Begin the recovery decoding, initially buf[j],  ebuf, pbuf, dest[j]
    916 	 * have already pointed at the beginning of each source buffers and
    917 	 * destination buffers */
    918 	for (sector = startSector, i = 0; sector < endSector; sector++, i++) {
    919 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
    920 		for (j = 0; j < numDataCol; j++)
    921 			if ((j != fcol[0]) && (j != fcol[1]))
    922 				buf[j] += bytesPerSector;
    923 		dest[0] += bytesPerSector;
    924 		dest[1] += bytesPerSector;
    925 		ebuf += bytesPerSector;
    926 		pbuf += bytesPerSector;
    927 	}
    928 	/* after recovery, the buffer pointed by olddata[0] is the old failed
    929 	 * data. With new writing data and this old data, use small write to
    930 	 * calculate the new redundant informations */
    931 	/* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of
    932 	 * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol
    933 	 * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[
    934 	 * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol
    935 	 * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of
    936 	 * wudNodes; For current implementation, we assume the simplest case:
    937 	 * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1
    938 	 * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new
    939 	 * data to be writen to the failed disk. We first bxor the new data
    940 	 * into the old recovered data, then do the same things as small
    941 	 * write. */
    942 
    943 	rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes);
    944 	/* do new 'E' calculation  */
    945 	/* find out the corresponding colume in encoding matrix for write
    946 	 * colume to be encoded into redundant disk 'E' */
    947 	scol = rf_EUCol(layoutPtr, fpda->raidAddress);
    948 	/* olddata[0] now is source buffer pointer; epda->bufPtr is the dest
    949 	 * buffer pointer               */
    950 	rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
    951 
    952 	/* do new 'P' calculation  */
    953 	rf_bxor(olddata[0], ppda->bufPtr, numbytes);
    954 	/* Free the allocated buffer  */
    955 	RF_Free(olddata[0], numbytes);
    956 	RF_Free(olddata[1], numbytes);
    957 	RF_Free(buf, numDataCol * sizeof(char *));
    958 
    959 	RF_ETIMER_STOP(timer);
    960 	RF_ETIMER_EVAL(timer);
    961 	if (tracerec) {
    962 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    963 	}
    964 	rf_GenericWakeupFunc(node, 0);
    965 	return (0);
    966 }
    967 #endif				/* RF_INCLUDE_EVENODD > 0 */
    968