Home | History | Annotate | Line # | Download | only in raidframe
rf_evenodd_dagfuncs.c revision 1.13
      1  1.13     oster /*	$NetBSD: rf_evenodd_dagfuncs.c,v 1.13 2003/12/29 02:38:17 oster Exp $	*/
      2   1.1     oster /*
      3   1.1     oster  * Copyright (c) 1995 Carnegie-Mellon University.
      4   1.1     oster  * All rights reserved.
      5   1.1     oster  *
      6   1.1     oster  * Author: ChangMing Wu
      7   1.1     oster  *
      8   1.1     oster  * Permission to use, copy, modify and distribute this software and
      9   1.1     oster  * its documentation is hereby granted, provided that both the copyright
     10   1.1     oster  * notice and this permission notice appear in all copies of the
     11   1.1     oster  * software, derivative works or modified versions, and any portions
     12   1.1     oster  * thereof, and that both notices appear in supporting documentation.
     13   1.1     oster  *
     14   1.1     oster  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15   1.1     oster  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16   1.1     oster  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17   1.1     oster  *
     18   1.1     oster  * Carnegie Mellon requests users of this software to return to
     19   1.1     oster  *
     20   1.1     oster  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21   1.1     oster  *  School of Computer Science
     22   1.1     oster  *  Carnegie Mellon University
     23   1.1     oster  *  Pittsburgh PA 15213-3890
     24   1.1     oster  *
     25   1.1     oster  * any improvements or extensions that they make and grant Carnegie the
     26   1.1     oster  * rights to redistribute these changes.
     27   1.1     oster  */
     28   1.1     oster 
     29   1.1     oster /*
     30   1.1     oster  * Code for RAID-EVENODD  architecture.
     31   1.1     oster  */
     32  1.11     lukem 
     33  1.11     lukem #include <sys/cdefs.h>
     34  1.13     oster __KERNEL_RCSID(0, "$NetBSD: rf_evenodd_dagfuncs.c,v 1.13 2003/12/29 02:38:17 oster Exp $");
     35   1.1     oster 
     36   1.7     oster #include "rf_archs.h"
     37  1.12    martin #include "opt_raid_diagnostic.h"
     38   1.7     oster 
     39   1.7     oster #if RF_INCLUDE_EVENODD > 0
     40   1.7     oster 
     41  1.10     oster #include <dev/raidframe/raidframevar.h>
     42  1.10     oster 
     43   1.1     oster #include "rf_raid.h"
     44   1.1     oster #include "rf_dag.h"
     45   1.1     oster #include "rf_dagffrd.h"
     46   1.1     oster #include "rf_dagffwr.h"
     47   1.1     oster #include "rf_dagdegrd.h"
     48   1.1     oster #include "rf_dagdegwr.h"
     49   1.1     oster #include "rf_dagutils.h"
     50   1.1     oster #include "rf_dagfuncs.h"
     51   1.1     oster #include "rf_etimer.h"
     52   1.1     oster #include "rf_general.h"
     53   1.1     oster #include "rf_parityscan.h"
     54   1.1     oster #include "rf_evenodd.h"
     55   1.1     oster #include "rf_evenodd_dagfuncs.h"
     56   1.1     oster 
     57   1.1     oster /* These redundant functions are for small write */
     58   1.2     oster RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P"};
     59   1.2     oster RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E"};
     60   1.1     oster /* These redundant functions are for degraded read */
     61   1.2     oster RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
     62   1.2     oster RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func"};
     63   1.1     oster /**********************************************************************************************
     64   1.2     oster  *   the following encoding node functions is used in  EO_000_CreateLargeWriteDAG
     65   1.1     oster  **********************************************************************************************/
     66   1.2     oster int
     67   1.2     oster rf_RegularPEFunc(node)
     68   1.2     oster 	RF_DagNode_t *node;
     69   1.1     oster {
     70   1.2     oster 	rf_RegularESubroutine(node, node->results[1]);
     71   1.2     oster 	rf_RegularXorFunc(node);/* does the wakeup here! */
     72   1.1     oster #if 1
     73   1.2     oster 	return (0);		/* XXX This was missing... GO */
     74   1.1     oster #endif
     75   1.1     oster }
     76   1.1     oster 
     77   1.1     oster 
     78   1.1     oster /************************************************************************************************
     79   1.1     oster  *  For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
     80   1.1     oster  *  be used. The previous case is when write access at least sectors of full stripe unit.
     81   1.1     oster  *  The later function is used when the write access two stripe units but with total sectors
     82   1.1     oster  *  less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
     83   1.1     oster  *  areas in their stripe unit and  parity write and 'E' write are both devided into two distinct
     84   1.1     oster  *  writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
     85   1.1     oster  ************************************************************************************************/
     86   1.1     oster 
     87   1.2     oster /* Algorithm:
     88   1.1     oster      1. Store the difference of old data and new data in the Rod buffer.
     89   1.2     oster      2. then encode this buffer into the buffer which already have old 'E' information inside it,
     90   1.1     oster 	the result can be shown to be the new 'E' information.
     91   1.1     oster      3. xor the Wnd buffer into the difference buffer to recover the  original old data.
     92   1.2     oster    Here we have another alternative: to allocate a temporary buffer for storing the difference of
     93   1.2     oster    old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
     94   1.1     oster    take the same speed as the previous, and need more memory.
     95   1.1     oster */
     96   1.2     oster int
     97   1.2     oster rf_RegularONEFunc(node)
     98   1.2     oster 	RF_DagNode_t *node;
     99   1.2     oster {
    100   1.2     oster 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    101   1.2     oster 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    102   1.2     oster 	int     EpdaIndex = (node->numParams - 1) / 2 - 1;	/* the parameter of node
    103   1.2     oster 								 * where you can find
    104   1.2     oster 								 * e-pda */
    105   1.2     oster 	int     i, k, retcode = 0;
    106   1.2     oster 	int     suoffset, length;
    107   1.2     oster 	RF_RowCol_t scol;
    108   1.2     oster 	char   *srcbuf, *destbuf;
    109   1.2     oster 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    110   1.2     oster 	RF_Etimer_t timer;
    111   1.9   thorpej 	RF_PhysDiskAddr_t *pda;
    112   1.9   thorpej #ifdef RAID_DIAGNOSTIC
    113   1.9   thorpej 	RF_PhysDiskAddr_t *EPDA =
    114   1.9   thorpej 	    (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
    115   1.9   thorpej 	int     ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);
    116   1.9   thorpej #endif /* RAID_DIAGNOSTIC */
    117   1.2     oster 
    118   1.2     oster 	RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
    119   1.2     oster 	RF_ASSERT(ESUOffset == 0);
    120   1.2     oster 
    121   1.2     oster 	RF_ETIMER_START(timer);
    122   1.2     oster 
    123   1.2     oster 	/* Xor the Wnd buffer into Rod buffer, the difference of old data and
    124   1.2     oster 	 * new data is stored in Rod buffer */
    125   1.2     oster 	for (k = 0; k < EpdaIndex; k += 2) {
    126   1.2     oster 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
    127   1.2     oster 		retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp);
    128   1.2     oster 	}
    129   1.2     oster 	/* Start to encoding the buffer storing the difference of old data and
    130   1.2     oster 	 * new data into 'E' buffer  */
    131   1.2     oster 	for (i = 0; i < EpdaIndex; i += 2)
    132   1.2     oster 		if (node->params[i + 1].p != node->results[0]) {	/* results[0] is buf ptr
    133   1.2     oster 									 * of E */
    134   1.2     oster 			pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    135   1.2     oster 			srcbuf = (char *) node->params[i + 1].p;
    136   1.2     oster 			scol = rf_EUCol(layoutPtr, pda->raidAddress);
    137   1.2     oster 			suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    138   1.2     oster 			destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset);
    139   1.2     oster 			rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    140   1.2     oster 		}
    141   1.2     oster 	/* Recover the original old data to be used by parity encoding
    142   1.2     oster 	 * function in XorNode */
    143   1.2     oster 	for (k = 0; k < EpdaIndex; k += 2) {
    144   1.2     oster 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
    145   1.2     oster 		retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp);
    146   1.2     oster 	}
    147   1.2     oster 	RF_ETIMER_STOP(timer);
    148   1.2     oster 	RF_ETIMER_EVAL(timer);
    149   1.2     oster 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    150   1.2     oster 	rf_GenericWakeupFunc(node, 0);
    151   1.1     oster #if 1
    152   1.2     oster 	return (0);		/* XXX this was missing.. GO */
    153   1.1     oster #endif
    154   1.1     oster }
    155   1.1     oster 
    156   1.2     oster int
    157   1.2     oster rf_SimpleONEFunc(node)
    158   1.2     oster 	RF_DagNode_t *node;
    159   1.2     oster {
    160   1.2     oster 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    161   1.2     oster 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    162   1.2     oster 	RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
    163   1.2     oster 	int     retcode = 0;
    164   1.2     oster 	char   *srcbuf, *destbuf;
    165   1.2     oster 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    166   1.2     oster 	int     length;
    167   1.2     oster 	RF_RowCol_t scol;
    168   1.2     oster 	RF_Etimer_t timer;
    169   1.2     oster 
    170   1.2     oster 	RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q);
    171   1.2     oster 	if (node->dagHdr->status == rf_enable) {
    172   1.2     oster 		RF_ETIMER_START(timer);
    173   1.2     oster 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector);	/* this is a pda of
    174   1.2     oster 														 * writeDataNodes */
    175   1.2     oster 		/* bxor to buffer of readDataNodes */
    176   1.2     oster 		retcode = rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
    177   1.2     oster 		/* find out the corresponding colume in encoding matrix for
    178   1.2     oster 		 * write colume to be encoded into redundant disk 'E' */
    179   1.2     oster 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    180   1.2     oster 		srcbuf = node->params[1].p;
    181   1.2     oster 		destbuf = node->params[3].p;
    182   1.2     oster 		/* Start encoding process */
    183   1.2     oster 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    184   1.2     oster 		rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
    185   1.2     oster 		RF_ETIMER_STOP(timer);
    186   1.2     oster 		RF_ETIMER_EVAL(timer);
    187   1.2     oster 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    188   1.2     oster 
    189   1.2     oster 	}
    190   1.2     oster 	return (rf_GenericWakeupFunc(node, retcode));	/* call wake func
    191   1.2     oster 							 * explicitly since no
    192   1.2     oster 							 * I/O in this node */
    193   1.1     oster }
    194   1.1     oster 
    195   1.1     oster 
    196   1.1     oster /****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write  ********/
    197   1.2     oster void
    198   1.2     oster rf_RegularESubroutine(node, ebuf)
    199   1.2     oster 	RF_DagNode_t *node;
    200   1.2     oster 	char   *ebuf;
    201   1.2     oster {
    202   1.2     oster 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    203   1.2     oster 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    204   1.2     oster 	RF_PhysDiskAddr_t *pda;
    205   1.2     oster 	int     i, suoffset;
    206   1.2     oster 	RF_RowCol_t scol;
    207   1.2     oster 	char   *srcbuf, *destbuf;
    208   1.2     oster 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    209   1.2     oster 	RF_Etimer_t timer;
    210   1.2     oster 
    211   1.2     oster 	RF_ETIMER_START(timer);
    212   1.2     oster 	for (i = 0; i < node->numParams - 2; i += 2) {
    213   1.2     oster 		RF_ASSERT(node->params[i + 1].p != ebuf);
    214   1.2     oster 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    215   1.2     oster 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    216   1.2     oster 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    217   1.2     oster 		srcbuf = (char *) node->params[i + 1].p;
    218   1.2     oster 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset);
    219   1.2     oster 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    220   1.2     oster 	}
    221   1.2     oster 	RF_ETIMER_STOP(timer);
    222   1.2     oster 	RF_ETIMER_EVAL(timer);
    223   1.2     oster 	tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    224   1.1     oster }
    225   1.1     oster 
    226   1.1     oster 
    227   1.1     oster /*******************************************************************************************
    228   1.2     oster  *			 Used in  EO_001_CreateLargeWriteDAG
    229   1.1     oster  ******************************************************************************************/
    230   1.2     oster int
    231   1.2     oster rf_RegularEFunc(node)
    232   1.2     oster 	RF_DagNode_t *node;
    233   1.1     oster {
    234   1.2     oster 	rf_RegularESubroutine(node, node->results[0]);
    235   1.2     oster 	rf_GenericWakeupFunc(node, 0);
    236   1.1     oster #if 1
    237   1.2     oster 	return (0);		/* XXX this was missing?.. GO */
    238   1.1     oster #endif
    239   1.1     oster }
    240   1.1     oster /*******************************************************************************************
    241   1.2     oster  * This degraded function allow only two case:
    242   1.2     oster  *  1. when write access the full failed stripe unit, then the access can be more than
    243   1.1     oster  *     one tripe units.
    244   1.2     oster  *  2. when write access only part of the failed SU, we assume accesses of more than
    245   1.2     oster  *     one stripe unit is not allowed so that the write can be dealt with like a
    246   1.2     oster  *     large write.
    247   1.2     oster  *  The following function is based on these assumptions. So except in the second case,
    248   1.1     oster  *  it looks the same as a large write encodeing function. But this is not exactly the
    249   1.2     oster  *  normal way for doing a degraded write, since raidframe have to break cases of access
    250   1.2     oster  *  other than the above two into smaller accesses. We may have to change
    251   1.2     oster  *  DegrESubroutin in the future.
    252   1.1     oster  *******************************************************************************************/
    253   1.2     oster void
    254   1.2     oster rf_DegrESubroutine(node, ebuf)
    255   1.2     oster 	RF_DagNode_t *node;
    256   1.2     oster 	char   *ebuf;
    257   1.2     oster {
    258   1.2     oster 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    259   1.2     oster 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    260   1.2     oster 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    261   1.2     oster 	RF_PhysDiskAddr_t *pda;
    262   1.2     oster 	int     i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    263   1.2     oster 	RF_RowCol_t scol;
    264   1.2     oster 	char   *srcbuf, *destbuf;
    265   1.2     oster 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    266   1.2     oster 	RF_Etimer_t timer;
    267   1.2     oster 
    268   1.2     oster 	RF_ETIMER_START(timer);
    269   1.2     oster 	for (i = 0; i < node->numParams - 2; i += 2) {
    270   1.2     oster 		RF_ASSERT(node->params[i + 1].p != ebuf);
    271   1.2     oster 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    272   1.2     oster 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    273   1.2     oster 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
    274   1.2     oster 		srcbuf = (char *) node->params[i + 1].p;
    275   1.2     oster 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    276   1.2     oster 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    277   1.2     oster 	}
    278   1.2     oster 
    279   1.2     oster 	RF_ETIMER_STOP(timer);
    280   1.2     oster 	RF_ETIMER_EVAL(timer);
    281   1.2     oster 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    282   1.1     oster }
    283   1.1     oster 
    284   1.1     oster 
    285   1.1     oster /**************************************************************************************
    286   1.2     oster  * This function is used in case where one data disk failed and both redundant disks
    287   1.1     oster  * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
    288   1.1     oster  * failed in the stripe but not accessed at this time, then we should, instead, use
    289   1.1     oster  * the rf_EOWriteDoubleRecoveryFunc().
    290   1.1     oster  **************************************************************************************/
    291   1.2     oster int
    292   1.2     oster rf_Degraded_100_EOFunc(node)
    293   1.2     oster 	RF_DagNode_t *node;
    294   1.1     oster {
    295   1.2     oster 	rf_DegrESubroutine(node, node->results[1]);
    296   1.2     oster 	rf_RecoveryXorFunc(node);	/* does the wakeup here! */
    297   1.1     oster #if 1
    298   1.2     oster 	return (0);		/* XXX this was missing... SHould these be
    299   1.2     oster 				 * void functions??? GO */
    300   1.1     oster #endif
    301   1.1     oster }
    302   1.1     oster /**************************************************************************************
    303   1.1     oster  * This function is to encode one sector in one of the data disks to the E disk.
    304   1.2     oster  * However, in evenodd this function can also be used as decoding function to recover
    305   1.1     oster  * data from dead disk in the case of parity failure and a single data failure.
    306   1.1     oster  **************************************************************************************/
    307   1.2     oster void
    308   1.2     oster rf_e_EncOneSect(
    309   1.2     oster     RF_RowCol_t srcLogicCol,
    310   1.2     oster     char *srcSecbuf,
    311   1.2     oster     RF_RowCol_t destLogicCol,
    312   1.2     oster     char *destSecbuf,
    313   1.2     oster     int bytesPerSector)
    314   1.1     oster {
    315   1.2     oster 	int     S_index;	/* index of the EU in the src col which need
    316   1.2     oster 				 * be Xored into all EUs in a dest sector */
    317   1.2     oster 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
    318   1.2     oster 	RF_RowCol_t j, indexInDest,	/* row index of an encoding unit in
    319   1.2     oster 					 * the destination colume of encoding
    320   1.2     oster 					 * matrix */
    321   1.2     oster 	        indexInSrc;	/* row index of an encoding unit in the source
    322   1.2     oster 				 * colume used for recovery */
    323   1.2     oster 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
    324   1.1     oster 
    325   1.1     oster #if RF_EO_MATRIX_DIM > 17
    326   1.2     oster 	int     shortsPerEU = bytesPerEU / sizeof(short);
    327   1.2     oster 	short  *destShortBuf, *srcShortBuf1, *srcShortBuf2;
    328   1.6  augustss 	short temp1;
    329   1.1     oster #elif RF_EO_MATRIX_DIM == 17
    330   1.2     oster 	int     longsPerEU = bytesPerEU / sizeof(long);
    331   1.2     oster 	long   *destLongBuf, *srcLongBuf1, *srcLongBuf2;
    332   1.6  augustss 	long temp1;
    333   1.1     oster #endif
    334   1.1     oster 
    335   1.1     oster #if RF_EO_MATRIX_DIM > 17
    336   1.2     oster 	RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1);
    337   1.2     oster 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
    338   1.1     oster #elif RF_EO_MATRIX_DIM == 17
    339   1.2     oster 	RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4);
    340   1.2     oster 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
    341   1.1     oster #endif
    342   1.1     oster 
    343   1.2     oster 	S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    344   1.1     oster #if RF_EO_MATRIX_DIM > 17
    345   1.2     oster 	srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU);
    346   1.1     oster #elif RF_EO_MATRIX_DIM == 17
    347   1.2     oster 	srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU);
    348   1.1     oster #endif
    349   1.1     oster 
    350   1.2     oster 	for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) {
    351   1.2     oster 		indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    352   1.1     oster 
    353   1.1     oster #if RF_EO_MATRIX_DIM > 17
    354   1.2     oster 		destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU);
    355   1.2     oster 		srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU);
    356   1.2     oster 		for (j = 0; j < shortsPerEU; j++) {
    357   1.2     oster 			temp1 = destShortBuf[j] ^ srcShortBuf1[j];
    358   1.2     oster 			/* note: S_index won't be at the end row for any src
    359   1.2     oster 			 * col! */
    360   1.2     oster 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
    361   1.2     oster 				destShortBuf[j] = (srcShortBuf2[j]) ^ temp1;
    362   1.2     oster 			/* if indexInSrc is at the end row, ie.
    363   1.2     oster 			 * RF_EO_MATRIX_DIM -1, then all elements are zero! */
    364   1.2     oster 			else
    365   1.2     oster 				destShortBuf[j] = temp1;
    366   1.2     oster 		}
    367   1.1     oster 
    368   1.1     oster #elif RF_EO_MATRIX_DIM == 17
    369   1.2     oster 		destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU);
    370   1.2     oster 		srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU);
    371   1.2     oster 		for (j = 0; j < longsPerEU; j++) {
    372   1.2     oster 			temp1 = destLongBuf[j] ^ srcLongBuf1[j];
    373   1.2     oster 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
    374   1.2     oster 				destLongBuf[j] = (srcLongBuf2[j]) ^ temp1;
    375   1.2     oster 			else
    376   1.2     oster 				destLongBuf[j] = temp1;
    377   1.2     oster 		}
    378   1.1     oster #endif
    379   1.2     oster 	}
    380   1.1     oster }
    381   1.1     oster 
    382   1.2     oster void
    383   1.2     oster rf_e_encToBuf(
    384   1.2     oster     RF_Raid_t * raidPtr,
    385   1.2     oster     RF_RowCol_t srcLogicCol,
    386   1.2     oster     char *srcbuf,
    387   1.2     oster     RF_RowCol_t destLogicCol,
    388   1.2     oster     char *destbuf,
    389   1.2     oster     int numSector)
    390   1.1     oster {
    391   1.2     oster 	int     i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    392   1.1     oster 
    393   1.2     oster 	for (i = 0; i < numSector; i++) {
    394   1.2     oster 		rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
    395   1.2     oster 		srcbuf += bytesPerSector;
    396   1.2     oster 		destbuf += bytesPerSector;
    397   1.2     oster 	}
    398   1.1     oster }
    399   1.2     oster /**************************************************************************************
    400   1.2     oster  * when parity die and one data die, We use second redundant information, 'E',
    401   1.2     oster  * to recover the data in dead disk. This function is used in the recovery node of
    402   1.2     oster  * for EO_110_CreateReadDAG
    403   1.1     oster  **************************************************************************************/
    404   1.2     oster int
    405   1.2     oster rf_RecoveryEFunc(node)
    406   1.2     oster 	RF_DagNode_t *node;
    407   1.2     oster {
    408   1.2     oster 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    409   1.2     oster 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    410   1.2     oster 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    411   1.2     oster 	RF_RowCol_t scol,	/* source logical column */
    412   1.2     oster 	        fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress);	/* logical column of
    413   1.2     oster 									 * failed SU */
    414   1.2     oster 	int     i;
    415   1.2     oster 	RF_PhysDiskAddr_t *pda;
    416   1.2     oster 	int     suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    417   1.2     oster 	char   *srcbuf, *destbuf;
    418   1.2     oster 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    419   1.2     oster 	RF_Etimer_t timer;
    420   1.2     oster 
    421   1.8   thorpej 	memset((char *) node->results[0], 0,
    422   1.8   thorpej 	    rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
    423   1.2     oster 	if (node->dagHdr->status == rf_enable) {
    424   1.2     oster 		RF_ETIMER_START(timer);
    425   1.2     oster 		for (i = 0; i < node->numParams - 2; i += 2)
    426   1.2     oster 			if (node->params[i + 1].p != node->results[0]) {
    427   1.2     oster 				pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    428   1.2     oster 				if (i == node->numParams - 4)
    429   1.2     oster 					scol = RF_EO_MATRIX_DIM - 2;	/* the colume of
    430   1.2     oster 									 * redundant E */
    431   1.2     oster 				else
    432   1.2     oster 					scol = rf_EUCol(layoutPtr, pda->raidAddress);
    433   1.2     oster 				srcbuf = (char *) node->params[i + 1].p;
    434   1.2     oster 				suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    435   1.2     oster 				destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    436   1.2     oster 				rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
    437   1.2     oster 			}
    438   1.2     oster 		RF_ETIMER_STOP(timer);
    439   1.2     oster 		RF_ETIMER_EVAL(timer);
    440   1.2     oster 		tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    441   1.2     oster 	}
    442   1.2     oster 	return (rf_GenericWakeupFunc(node, 0));	/* node execute successfully */
    443   1.1     oster }
    444   1.1     oster /**************************************************************************************
    445   1.1     oster  * This function is used in the case where one data and the parity have filed.
    446   1.1     oster  * (in EO_110_CreateWriteDAG )
    447   1.1     oster  **************************************************************************************/
    448   1.2     oster int
    449   1.2     oster rf_EO_DegradedWriteEFunc(RF_DagNode_t * node)
    450   1.1     oster {
    451   1.2     oster 	rf_DegrESubroutine(node, node->results[0]);
    452   1.2     oster 	rf_GenericWakeupFunc(node, 0);
    453   1.1     oster #if 1
    454   1.2     oster 	return (0);		/* XXX Yet another one!! GO */
    455   1.1     oster #endif
    456   1.1     oster }
    457   1.1     oster 
    458   1.1     oster 
    459   1.2     oster 
    460   1.1     oster /**************************************************************************************
    461   1.1     oster  *  		THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
    462   1.1     oster  **************************************************************************************/
    463   1.1     oster 
    464   1.2     oster void
    465   1.2     oster rf_doubleEOdecode(
    466   1.2     oster     RF_Raid_t * raidPtr,
    467   1.2     oster     char **rrdbuf,
    468   1.2     oster     char **dest,
    469   1.2     oster     RF_RowCol_t * fcol,
    470   1.2     oster     char *pbuf,
    471   1.2     oster     char *ebuf)
    472   1.2     oster {
    473   1.2     oster 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    474   1.2     oster 	int     i, j, k, f1, f2, row;
    475   1.2     oster 	int     rrdrow, erow, count = 0;
    476   1.2     oster 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    477   1.2     oster 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
    478   1.1     oster #if 0
    479   1.2     oster 	int     pcol = (RF_EO_MATRIX_DIM) - 1;
    480   1.1     oster #endif
    481   1.2     oster 	int     ecol = (RF_EO_MATRIX_DIM) - 2;
    482   1.2     oster 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
    483   1.2     oster 	int     numDataCol = layoutPtr->numDataCol;
    484   1.2     oster #if RF_EO_MATRIX_DIM > 17
    485   1.2     oster 	int     shortsPerEU = bytesPerEU / sizeof(short);
    486   1.2     oster 	short  *rrdbuf_current, *pbuf_current, *ebuf_current;
    487   1.2     oster 	short  *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    488   1.6  augustss 	short *temp;
    489   1.2     oster 	short  *P;
    490   1.2     oster 
    491   1.2     oster 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
    492   1.2     oster 	RF_Malloc(P, bytesPerEU, (short *));
    493   1.2     oster 	RF_Malloc(temp, bytesPerEU, (short *));
    494   1.2     oster #elif RF_EO_MATRIX_DIM == 17
    495   1.2     oster 	int     longsPerEU = bytesPerEU / sizeof(long);
    496   1.2     oster 	long   *rrdbuf_current, *pbuf_current, *ebuf_current;
    497   1.2     oster 	long   *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    498   1.6  augustss 	long *temp;
    499   1.2     oster 	long   *P;
    500   1.2     oster 
    501   1.2     oster 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
    502   1.2     oster 	RF_Malloc(P, bytesPerEU, (long *));
    503   1.2     oster 	RF_Malloc(temp, bytesPerEU, (long *));
    504   1.2     oster #endif
    505   1.2     oster 	RF_ASSERT(*((long *) dest[0]) == 0);
    506   1.2     oster 	RF_ASSERT(*((long *) dest[1]) == 0);
    507   1.8   thorpej 	memset((char *) P, 0, bytesPerEU);
    508   1.8   thorpej 	memset((char *) temp, 0, bytesPerEU);
    509   1.2     oster 	RF_ASSERT(*P == 0);
    510   1.2     oster 	/* calculate the 'P' parameter, which, not parity, is the Xor of all
    511   1.2     oster 	 * elements in the last two column, ie. 'E' and 'parity' colume, see
    512   1.2     oster 	 * the Ref. paper by Blaum, et al 1993  */
    513   1.2     oster 	for (i = 0; i < numRowInEncMatix; i++)
    514   1.2     oster 		for (k = 0; k < longsPerEU; k++) {
    515   1.2     oster #if RF_EO_MATRIX_DIM > 17
    516   1.2     oster 			ebuf_current = ((short *) ebuf) + i * shortsPerEU + k;
    517   1.2     oster 			pbuf_current = ((short *) pbuf) + i * shortsPerEU + k;
    518   1.2     oster #elif RF_EO_MATRIX_DIM == 17
    519   1.2     oster 			ebuf_current = ((long *) ebuf) + i * longsPerEU + k;
    520   1.2     oster 			pbuf_current = ((long *) pbuf) + i * longsPerEU + k;
    521   1.2     oster #endif
    522   1.2     oster 			P[k] ^= *ebuf_current;
    523   1.2     oster 			P[k] ^= *pbuf_current;
    524   1.2     oster 		}
    525   1.2     oster 	RF_ASSERT(fcol[0] != fcol[1]);
    526   1.2     oster 	if (fcol[0] < fcol[1]) {
    527   1.2     oster #if RF_EO_MATRIX_DIM > 17
    528   1.2     oster 		dest_smaller = (short *) (dest[0]);
    529   1.2     oster 		dest_larger = (short *) (dest[1]);
    530   1.2     oster #elif RF_EO_MATRIX_DIM == 17
    531   1.2     oster 		dest_smaller = (long *) (dest[0]);
    532   1.2     oster 		dest_larger = (long *) (dest[1]);
    533   1.2     oster #endif
    534   1.2     oster 		f1 = fcol[0];
    535   1.2     oster 		f2 = fcol[1];
    536   1.2     oster 	} else {
    537   1.2     oster #if RF_EO_MATRIX_DIM > 17
    538   1.2     oster 		dest_smaller = (short *) (dest[1]);
    539   1.2     oster 		dest_larger = (short *) (dest[0]);
    540   1.2     oster #elif RF_EO_MATRIX_DIM == 17
    541   1.2     oster 		dest_smaller = (long *) (dest[1]);
    542   1.2     oster 		dest_larger = (long *) (dest[0]);
    543   1.2     oster #endif
    544   1.2     oster 		f1 = fcol[1];
    545   1.2     oster 		f2 = fcol[0];
    546   1.2     oster 	}
    547   1.2     oster 	row = (RF_EO_MATRIX_DIM) - 1;
    548   1.2     oster 	while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) {
    549   1.2     oster #if RF_EO_MATRIX_DIM > 17
    550   1.2     oster 		dest_larger_current = dest_larger + row * shortsPerEU;
    551   1.2     oster 		dest_smaller_current = dest_smaller + row * shortsPerEU;
    552   1.2     oster #elif RF_EO_MATRIX_DIM == 17
    553   1.2     oster 		dest_larger_current = dest_larger + row * longsPerEU;
    554   1.2     oster 		dest_smaller_current = dest_smaller + row * longsPerEU;
    555   1.2     oster #endif
    556   1.2     oster 		/**    Do the diagonal recovery. Initially, temp[k] = (failed 1),
    557   1.2     oster 		       which is the failed data in the colume which has smaller col index. **/
    558   1.2     oster 		/* step 1:  ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3))         */
    559   1.2     oster 		for (j = 0; j < numDataCol; j++) {
    560   1.2     oster 			if (j == f1 || j == f2)
    561   1.2     oster 				continue;
    562   1.2     oster 			rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM);
    563   1.2     oster 			if (rrdrow != (RF_EO_MATRIX_DIM) - 1) {
    564   1.2     oster #if RF_EO_MATRIX_DIM > 17
    565   1.2     oster 				rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU;
    566   1.2     oster 				for (k = 0; k < shortsPerEU; k++)
    567   1.2     oster 					temp[k] ^= *(rrdbuf_current + k);
    568   1.2     oster #elif RF_EO_MATRIX_DIM == 17
    569   1.2     oster 				rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU;
    570   1.2     oster 				for (k = 0; k < longsPerEU; k++)
    571   1.2     oster 					temp[k] ^= *(rrdbuf_current + k);
    572   1.2     oster #endif
    573   1.2     oster 			}
    574   1.2     oster 		}
    575   1.2     oster 		/* step 2:  ^E(erow,m-2), If erow is at the buttom row, don't
    576   1.2     oster 		 * Xor into it  E(erow,m-2) = (principle diagonal) ^ (failed
    577   1.2     oster 		 * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal
    578   1.2     oster 		 * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle
    579   1.2     oster 		 * diagonal) ^ (failed 2)       */
    580   1.2     oster 
    581   1.2     oster 		erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM));
    582   1.2     oster 		if (erow != (RF_EO_MATRIX_DIM) - 1) {
    583   1.2     oster #if RF_EO_MATRIX_DIM > 17
    584   1.2     oster 			ebuf_current = (short *) ebuf + shortsPerEU * erow;
    585   1.2     oster 			for (k = 0; k < shortsPerEU; k++)
    586   1.2     oster 				temp[k] ^= *(ebuf_current + k);
    587   1.2     oster #elif RF_EO_MATRIX_DIM == 17
    588   1.2     oster 			ebuf_current = (long *) ebuf + longsPerEU * erow;
    589   1.2     oster 			for (k = 0; k < longsPerEU; k++)
    590   1.2     oster 				temp[k] ^= *(ebuf_current + k);
    591   1.2     oster #endif
    592   1.2     oster 		}
    593   1.2     oster 		/* step 3: ^P to obtain the failed data (failed 2).  P can be
    594   1.2     oster 		 * proved to be actually  (principle diagonal)  After this
    595   1.2     oster 		 * step, temp[k] = (failed 2), the failed data to be recovered */
    596   1.2     oster #if RF_EO_MATRIX_DIM > 17
    597   1.2     oster 		for (k = 0; k < shortsPerEU; k++)
    598   1.2     oster 			temp[k] ^= P[k];
    599   1.2     oster 		/* Put the data to the destination buffer                              */
    600   1.2     oster 		for (k = 0; k < shortsPerEU; k++)
    601   1.2     oster 			dest_larger_current[k] = temp[k];
    602   1.2     oster #elif RF_EO_MATRIX_DIM == 17
    603   1.2     oster 		for (k = 0; k < longsPerEU; k++)
    604   1.2     oster 			temp[k] ^= P[k];
    605   1.2     oster 		/* Put the data to the destination buffer                              */
    606   1.2     oster 		for (k = 0; k < longsPerEU; k++)
    607   1.2     oster 			dest_larger_current[k] = temp[k];
    608   1.2     oster #endif
    609   1.2     oster 
    610   1.2     oster 		/**          THE FOLLOWING DO THE HORIZONTAL XOR                **/
    611   1.2     oster 		/* step 1:  ^(SUM of A(row,0..m-3)), ie. all nonfailed data
    612   1.2     oster 		 * columes    */
    613   1.2     oster 		for (j = 0; j < numDataCol; j++) {
    614   1.2     oster 			if (j == f1 || j == f2)
    615   1.2     oster 				continue;
    616   1.2     oster #if RF_EO_MATRIX_DIM > 17
    617   1.2     oster 			rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU;
    618   1.2     oster 			for (k = 0; k < shortsPerEU; k++)
    619   1.2     oster 				temp[k] ^= *(rrdbuf_current + k);
    620   1.2     oster #elif RF_EO_MATRIX_DIM == 17
    621   1.2     oster 			rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU;
    622   1.2     oster 			for (k = 0; k < longsPerEU; k++)
    623   1.2     oster 				temp[k] ^= *(rrdbuf_current + k);
    624   1.2     oster #endif
    625   1.2     oster 		}
    626   1.2     oster 		/* step 2: ^A(row,m-1) */
    627   1.2     oster 		/* step 3: Put the data to the destination buffer                             	 */
    628   1.2     oster #if RF_EO_MATRIX_DIM > 17
    629   1.2     oster 		pbuf_current = (short *) pbuf + shortsPerEU * row;
    630   1.2     oster 		for (k = 0; k < shortsPerEU; k++)
    631   1.2     oster 			temp[k] ^= *(pbuf_current + k);
    632   1.2     oster 		for (k = 0; k < shortsPerEU; k++)
    633   1.2     oster 			dest_smaller_current[k] = temp[k];
    634   1.2     oster #elif RF_EO_MATRIX_DIM == 17
    635   1.2     oster 		pbuf_current = (long *) pbuf + longsPerEU * row;
    636   1.2     oster 		for (k = 0; k < longsPerEU; k++)
    637   1.2     oster 			temp[k] ^= *(pbuf_current + k);
    638   1.2     oster 		for (k = 0; k < longsPerEU; k++)
    639   1.2     oster 			dest_smaller_current[k] = temp[k];
    640   1.2     oster #endif
    641   1.2     oster 		count++;
    642   1.2     oster 	}
    643   1.2     oster 	/* Check if all Encoding Unit in the data buffer have been decoded,
    644   1.2     oster 	 * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
    645   1.2     oster 	 * this algorithm will covered all buffer 				 */
    646   1.2     oster 	RF_ASSERT(count == numRowInEncMatix);
    647   1.2     oster 	RF_Free((char *) P, bytesPerEU);
    648   1.2     oster 	RF_Free((char *) temp, bytesPerEU);
    649   1.1     oster }
    650   1.2     oster 
    651   1.1     oster 
    652   1.1     oster /***************************************************************************************
    653   1.1     oster * 	This function is called by double degragded read
    654   1.2     oster * 	EO_200_CreateReadDAG
    655   1.1     oster *
    656   1.1     oster ***************************************************************************************/
    657   1.2     oster int
    658   1.2     oster rf_EvenOddDoubleRecoveryFunc(node)
    659   1.2     oster 	RF_DagNode_t *node;
    660   1.2     oster {
    661   1.2     oster 	int     ndataParam = 0;
    662   1.2     oster 	int     np = node->numParams;
    663   1.2     oster 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    664   1.2     oster 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    665   1.2     oster 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    666   1.2     oster 	int     i, prm, sector, nresults = node->numResults;
    667   1.2     oster 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    668   1.2     oster 	unsigned sosAddr;
    669   1.2     oster 	int     two = 0, mallc_one = 0, mallc_two = 0;	/* flags to indicate if
    670   1.2     oster 							 * memory is allocated */
    671   1.2     oster 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    672   1.2     oster 	RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1,
    673   1.2     oster 	        npda;
    674   1.2     oster 	RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
    675   1.2     oster 	char  **buf, *ebuf, *pbuf, *dest[2];
    676   1.2     oster 	long   *suoff = NULL, *suend = NULL, *prmToCol = NULL, psuoff, esuoff;
    677   1.2     oster 	RF_SectorNum_t startSector, endSector;
    678   1.2     oster 	RF_Etimer_t timer;
    679   1.2     oster 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    680   1.2     oster 
    681   1.2     oster 	RF_ETIMER_START(timer);
    682   1.2     oster 
    683   1.2     oster 	/* Find out the number of parameters which are pdas for data
    684   1.2     oster 	 * information */
    685   1.2     oster 	for (i = 0; i <= np; i++)
    686   1.2     oster 		if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) {
    687   1.2     oster 			ndataParam = i;
    688   1.2     oster 			break;
    689   1.2     oster 		}
    690   1.2     oster 	RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
    691   1.2     oster 	if (ndataParam != 0) {
    692   1.2     oster 		RF_Malloc(suoff, ndataParam * sizeof(long), (long *));
    693   1.2     oster 		RF_Malloc(suend, ndataParam * sizeof(long), (long *));
    694   1.2     oster 		RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *));
    695   1.2     oster 	}
    696   1.2     oster 	if (asmap->failedPDAs[1] &&
    697   1.2     oster 	    (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
    698   1.2     oster 		RF_ASSERT(0);	/* currently, no support for this situation */
    699   1.2     oster 		ppda = node->params[np - 6].p;
    700   1.2     oster 		ppda2 = node->params[np - 5].p;
    701   1.2     oster 		RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY);
    702   1.2     oster 		epda = node->params[np - 4].p;
    703   1.2     oster 		epda2 = node->params[np - 3].p;
    704   1.2     oster 		RF_ASSERT(epda2->type == RF_PDA_TYPE_Q);
    705   1.2     oster 		two = 1;
    706   1.2     oster 	} else {
    707   1.2     oster 		ppda = node->params[np - 4].p;
    708   1.2     oster 		epda = node->params[np - 3].p;
    709   1.2     oster 		psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
    710   1.2     oster 		esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
    711   1.2     oster 		RF_ASSERT(psuoff == esuoff);
    712   1.2     oster 	}
    713   1.2     oster 	/*
    714   1.2     oster             the followings have three goals:
    715   1.2     oster             1. determine the startSector to begin decoding and endSector to end decoding.
    716   1.2     oster             2. determine the colume numbers of the two failed disks.
    717   1.2     oster             3. determine the offset and end offset of the access within each failed stripe unit.
    718   1.2     oster          */
    719   1.2     oster 	if (nresults == 1) {
    720   1.2     oster 		/* find the startSector to begin decoding */
    721   1.2     oster 		pda = node->results[0];
    722   1.8   thorpej 		memset(pda->bufPtr, 0, bytesPerSector * pda->numSector);
    723   1.2     oster 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    724   1.2     oster 		fsuend[0] = fsuoff[0] + pda->numSector;
    725   1.2     oster 		startSector = fsuoff[0];
    726   1.2     oster 		endSector = fsuend[0];
    727   1.2     oster 
    728   1.5     soren 		/* find out the column of failed disk being accessed */
    729   1.2     oster 		fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress);
    730   1.2     oster 
    731   1.2     oster 		/* find out the other failed colume not accessed */
    732   1.2     oster 		sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    733   1.2     oster 		for (i = 0; i < numDataCol; i++) {
    734   1.2     oster 			npda.raidAddress = sosAddr + (i * secPerSU);
    735  1.13     oster 			(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0);
    736   1.2     oster 			/* skip over dead disks */
    737  1.13     oster 			if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status))
    738   1.2     oster 				if (i != fcol[0])
    739   1.2     oster 					break;
    740   1.2     oster 		}
    741   1.2     oster 		RF_ASSERT(i < numDataCol);
    742   1.2     oster 		fcol[1] = i;
    743   1.2     oster 	} else {
    744   1.2     oster 		RF_ASSERT(nresults == 2);
    745   1.2     oster 		pda0 = node->results[0];
    746   1.8   thorpej 		memset(pda0->bufPtr, 0, bytesPerSector * pda0->numSector);
    747   1.2     oster 		pda1 = node->results[1];
    748   1.8   thorpej 		memset(pda1->bufPtr, 0, bytesPerSector * pda1->numSector);
    749   1.2     oster 		/* determine the failed colume numbers of the two failed
    750   1.2     oster 		 * disks. */
    751   1.2     oster 		fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress);
    752   1.2     oster 		fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress);
    753   1.2     oster 		/* determine the offset and end offset of the access within
    754   1.2     oster 		 * each failed stripe unit. */
    755   1.2     oster 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector);
    756   1.2     oster 		fsuend[0] = fsuoff[0] + pda0->numSector;
    757   1.2     oster 		fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector);
    758   1.2     oster 		fsuend[1] = fsuoff[1] + pda1->numSector;
    759   1.2     oster 		/* determine the startSector to begin decoding */
    760   1.2     oster 		startSector = RF_MIN(pda0->startSector, pda1->startSector);
    761   1.2     oster 		/* determine the endSector to end decoding */
    762   1.2     oster 		endSector = RF_MAX(fsuend[0], fsuend[1]);
    763   1.2     oster 	}
    764   1.2     oster 	/*
    765   1.2     oster 	      assign the beginning sector and the end sector for each parameter
    766   1.2     oster 	      find out the corresponding colume # for each parameter
    767   1.2     oster         */
    768   1.2     oster 	for (prm = 0; prm < ndataParam; prm++) {
    769   1.2     oster 		pda = node->params[prm].p;
    770   1.2     oster 		suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    771   1.2     oster 		suend[prm] = suoff[prm] + pda->numSector;
    772   1.2     oster 		prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress);
    773   1.2     oster 	}
    774   1.2     oster 	/* 'sector' is the sector for the current decoding algorithm. For each
    775   1.2     oster 	 * sector in the failed SU, find out the corresponding parameters that
    776   1.2     oster 	 * cover the current sector and that are needed for decoding of this
    777   1.2     oster 	 * sector in failed SU. 2.  Find out if sector is in the shadow of any
    778   1.2     oster 	 * accessed failed SU. If not, malloc a temporary space of a sector in
    779   1.2     oster 	 * size. */
    780   1.2     oster 	for (sector = startSector; sector < endSector; sector++) {
    781   1.2     oster 		if (nresults == 2)
    782   1.2     oster 			if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1]))
    783   1.2     oster 				continue;
    784   1.2     oster 		for (prm = 0; prm < ndataParam; prm++)
    785   1.2     oster 			if (suoff[prm] <= sector && sector < suend[prm])
    786   1.2     oster 				buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr +
    787   1.2     oster 				    rf_RaidAddressToByte(raidPtr, sector - suoff[prm]);
    788   1.2     oster 		/* find out if sector is in the shadow of any accessed failed
    789   1.2     oster 		 * SU. If yes, assign dest[0], dest[1] to point at suitable
    790   1.2     oster 		 * position of the buffer corresponding to failed SUs. if no,
    791   1.2     oster 		 * malloc a temporary space of a sector in size for
    792   1.2     oster 		 * destination of decoding. */
    793   1.2     oster 		RF_ASSERT(nresults == 1 || nresults == 2);
    794   1.2     oster 		if (nresults == 1) {
    795   1.2     oster 			dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
    796   1.2     oster 			/* Always malloc temp buffer to dest[1]  */
    797   1.2     oster 			RF_Malloc(dest[1], bytesPerSector, (char *));
    798   1.8   thorpej 			memset(dest[1], 0, bytesPerSector);
    799   1.2     oster 			mallc_two = 1;
    800   1.2     oster 		} else {
    801   1.2     oster 			if (fsuoff[0] <= sector && sector < fsuend[0])
    802   1.2     oster 				dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
    803   1.2     oster 			else {
    804   1.2     oster 				RF_Malloc(dest[0], bytesPerSector, (char *));
    805   1.8   thorpej 				memset(dest[0], 0, bytesPerSector);
    806   1.2     oster 				mallc_one = 1;
    807   1.2     oster 			}
    808   1.2     oster 			if (fsuoff[1] <= sector && sector < fsuend[1])
    809   1.2     oster 				dest[1] = ((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]);
    810   1.2     oster 			else {
    811   1.2     oster 				RF_Malloc(dest[1], bytesPerSector, (char *));
    812   1.8   thorpej 				memset(dest[1], 0, bytesPerSector);
    813   1.2     oster 				mallc_two = 1;
    814   1.2     oster 			}
    815   1.2     oster 			RF_ASSERT(mallc_one == 0 || mallc_two == 0);
    816   1.2     oster 		}
    817   1.2     oster 		pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff);
    818   1.2     oster 		ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff);
    819   1.2     oster 		/*
    820   1.2     oster 	         * After finish finding all needed sectors, call doubleEOdecode function for decoding
    821   1.2     oster 	         * one sector to destination.
    822   1.2     oster 	         */
    823   1.2     oster 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
    824   1.2     oster 		/* free all allocated memory, and mark flag to indicate no
    825   1.2     oster 		 * memory is being allocated */
    826   1.2     oster 		if (mallc_one == 1)
    827   1.2     oster 			RF_Free(dest[0], bytesPerSector);
    828   1.2     oster 		if (mallc_two == 1)
    829   1.2     oster 			RF_Free(dest[1], bytesPerSector);
    830   1.2     oster 		mallc_one = mallc_two = 0;
    831   1.2     oster 	}
    832   1.2     oster 	RF_Free(buf, numDataCol * sizeof(char *));
    833   1.2     oster 	if (ndataParam != 0) {
    834   1.2     oster 		RF_Free(suoff, ndataParam * sizeof(long));
    835   1.2     oster 		RF_Free(suend, ndataParam * sizeof(long));
    836   1.2     oster 		RF_Free(prmToCol, ndataParam * sizeof(long));
    837   1.2     oster 	}
    838   1.2     oster 	RF_ETIMER_STOP(timer);
    839   1.2     oster 	RF_ETIMER_EVAL(timer);
    840   1.2     oster 	if (tracerec) {
    841   1.2     oster 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    842   1.2     oster 	}
    843   1.2     oster 	rf_GenericWakeupFunc(node, 0);
    844   1.1     oster #if 1
    845   1.2     oster 	return (0);		/* XXX is this even close!!?!?!!? GO */
    846   1.1     oster #endif
    847   1.1     oster }
    848   1.1     oster 
    849   1.1     oster 
    850   1.2     oster /* currently, only access of one of the two failed SU is allowed in this function.
    851   1.2     oster  * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
    852   1.1     oster  * many accesses of single stripe unit.
    853   1.1     oster  */
    854   1.1     oster 
    855   1.2     oster int
    856   1.2     oster rf_EOWriteDoubleRecoveryFunc(node)
    857   1.2     oster 	RF_DagNode_t *node;
    858   1.2     oster {
    859   1.2     oster 	int     np = node->numParams;
    860   1.2     oster 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    861   1.2     oster 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    862   1.2     oster 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    863   1.2     oster 	RF_SectorNum_t sector;
    864   1.2     oster 	RF_RowCol_t col, scol;
    865   1.2     oster 	int     prm, i, j;
    866   1.2     oster 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    867   1.2     oster 	unsigned sosAddr;
    868   1.2     oster 	unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    869   1.2     oster 	RF_int64 numbytes;
    870   1.2     oster 	RF_SectorNum_t startSector, endSector;
    871   1.2     oster 	RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda;
    872   1.2     oster 	RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
    873   1.2     oster 	char  **buf;		/* buf[0], buf[1], buf[2], ...etc. point to
    874   1.2     oster 				 * buffer storing data read from col0, col1,
    875   1.2     oster 				 * col2 */
    876   1.2     oster 	char   *ebuf, *pbuf, *dest[2], *olddata[2];
    877   1.2     oster 	RF_Etimer_t timer;
    878   1.2     oster 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    879   1.2     oster 
    880   1.2     oster 	RF_ASSERT(asmap->numDataFailed == 1);	/* currently only support this
    881   1.2     oster 						 * case, the other failed SU
    882   1.2     oster 						 * is not being accessed */
    883   1.2     oster 	RF_ETIMER_START(timer);
    884   1.2     oster 	RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
    885   1.2     oster 
    886   1.2     oster 	ppda = node->results[0];/* Instead of being buffers, node->results[0]
    887   1.2     oster 				 * and [1] are Ppda and Epda  */
    888   1.2     oster 	epda = node->results[1];
    889   1.2     oster 	fpda = asmap->failedPDAs[0];
    890   1.2     oster 
    891   1.2     oster 	/* First, recovery the failed old SU using EvenOdd double decoding      */
    892   1.2     oster 	/* determine the startSector and endSector for decoding */
    893   1.2     oster 	startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector);
    894   1.2     oster 	endSector = startSector + fpda->numSector;
    895   1.2     oster 	/* Assign buf[col] pointers to point to each non-failed colume  and
    896   1.2     oster 	 * initialize the pbuf and ebuf to point at the beginning of each
    897   1.2     oster 	 * source buffers and destination buffers */
    898   1.2     oster 	for (prm = 0; prm < numDataCol - 2; prm++) {
    899   1.2     oster 		pda = (RF_PhysDiskAddr_t *) node->params[prm].p;
    900   1.2     oster 		col = rf_EUCol(layoutPtr, pda->raidAddress);
    901   1.2     oster 		buf[col] = pda->bufPtr;
    902   1.2     oster 	}
    903   1.2     oster 	/* pbuf and ebuf:  they will change values as double recovery decoding
    904   1.2     oster 	 * goes on */
    905   1.2     oster 	pbuf = ppda->bufPtr;
    906   1.2     oster 	ebuf = epda->bufPtr;
    907   1.2     oster 	/* find out the logical colume numbers in the encoding matrix of the
    908   1.2     oster 	 * two failed columes */
    909   1.2     oster 	fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress);
    910   1.2     oster 
    911   1.2     oster 	/* find out the other failed colume not accessed this time */
    912   1.2     oster 	sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    913   1.2     oster 	for (i = 0; i < numDataCol; i++) {
    914   1.2     oster 		npda.raidAddress = sosAddr + (i * secPerSU);
    915  1.13     oster 		(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0);
    916   1.2     oster 		/* skip over dead disks */
    917  1.13     oster 		if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status))
    918   1.2     oster 			if (i != fcol[0])
    919   1.2     oster 				break;
    920   1.2     oster 	}
    921   1.2     oster 	RF_ASSERT(i < numDataCol);
    922   1.2     oster 	fcol[1] = i;
    923   1.2     oster 	/* assign temporary space to put recovered failed SU */
    924   1.2     oster 	numbytes = fpda->numSector * bytesPerSector;
    925   1.2     oster 	RF_Malloc(olddata[0], numbytes, (char *));
    926   1.2     oster 	RF_Malloc(olddata[1], numbytes, (char *));
    927   1.2     oster 	dest[0] = olddata[0];
    928   1.2     oster 	dest[1] = olddata[1];
    929   1.8   thorpej 	memset(olddata[0], 0, numbytes);
    930   1.8   thorpej 	memset(olddata[1], 0, numbytes);
    931   1.2     oster 	/* Begin the recovery decoding, initially buf[j],  ebuf, pbuf, dest[j]
    932   1.2     oster 	 * have already pointed at the beginning of each source buffers and
    933   1.2     oster 	 * destination buffers */
    934   1.2     oster 	for (sector = startSector, i = 0; sector < endSector; sector++, i++) {
    935   1.2     oster 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
    936   1.2     oster 		for (j = 0; j < numDataCol; j++)
    937   1.2     oster 			if ((j != fcol[0]) && (j != fcol[1]))
    938   1.2     oster 				buf[j] += bytesPerSector;
    939   1.2     oster 		dest[0] += bytesPerSector;
    940   1.2     oster 		dest[1] += bytesPerSector;
    941   1.2     oster 		ebuf += bytesPerSector;
    942   1.2     oster 		pbuf += bytesPerSector;
    943   1.2     oster 	}
    944   1.2     oster 	/* after recovery, the buffer pointed by olddata[0] is the old failed
    945   1.2     oster 	 * data. With new writing data and this old data, use small write to
    946   1.2     oster 	 * calculate the new redundant informations */
    947   1.2     oster 	/* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of
    948   1.2     oster 	 * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol
    949   1.2     oster 	 * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[
    950   1.2     oster 	 * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol
    951   1.2     oster 	 * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of
    952   1.2     oster 	 * wudNodes; For current implementation, we assume the simplest case:
    953   1.2     oster 	 * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1
    954   1.2     oster 	 * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new
    955   1.2     oster 	 * data to be writen to the failed disk. We first bxor the new data
    956   1.2     oster 	 * into the old recovered data, then do the same things as small
    957   1.2     oster 	 * write. */
    958   1.2     oster 
    959   1.2     oster 	rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes, node->dagHdr->bp);
    960   1.2     oster 	/* do new 'E' calculation  */
    961   1.2     oster 	/* find out the corresponding colume in encoding matrix for write
    962   1.2     oster 	 * colume to be encoded into redundant disk 'E' */
    963   1.2     oster 	scol = rf_EUCol(layoutPtr, fpda->raidAddress);
    964   1.2     oster 	/* olddata[0] now is source buffer pointer; epda->bufPtr is the dest
    965   1.2     oster 	 * buffer pointer               */
    966   1.2     oster 	rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
    967   1.2     oster 
    968   1.2     oster 	/* do new 'P' calculation  */
    969   1.2     oster 	rf_bxor(olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp);
    970   1.2     oster 	/* Free the allocated buffer  */
    971   1.2     oster 	RF_Free(olddata[0], numbytes);
    972   1.2     oster 	RF_Free(olddata[1], numbytes);
    973   1.2     oster 	RF_Free(buf, numDataCol * sizeof(char *));
    974   1.2     oster 
    975   1.2     oster 	RF_ETIMER_STOP(timer);
    976   1.2     oster 	RF_ETIMER_EVAL(timer);
    977   1.2     oster 	if (tracerec) {
    978   1.2     oster 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    979   1.2     oster 	}
    980   1.2     oster 	rf_GenericWakeupFunc(node, 0);
    981   1.2     oster 	return (0);
    982   1.1     oster }
    983   1.7     oster #endif				/* RF_INCLUDE_EVENODD > 0 */
    984