Home | History | Annotate | Line # | Download | only in raidframe
rf_evenodd_dagfuncs.c revision 1.1
      1  1.1  oster /*	$NetBSD: rf_evenodd_dagfuncs.c,v 1.1 1998/11/13 04:20:29 oster Exp $	*/
      2  1.1  oster /*
      3  1.1  oster  * Copyright (c) 1995 Carnegie-Mellon University.
      4  1.1  oster  * All rights reserved.
      5  1.1  oster  *
      6  1.1  oster  * Author: ChangMing Wu
      7  1.1  oster  *
      8  1.1  oster  * Permission to use, copy, modify and distribute this software and
      9  1.1  oster  * its documentation is hereby granted, provided that both the copyright
     10  1.1  oster  * notice and this permission notice appear in all copies of the
     11  1.1  oster  * software, derivative works or modified versions, and any portions
     12  1.1  oster  * thereof, and that both notices appear in supporting documentation.
     13  1.1  oster  *
     14  1.1  oster  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  1.1  oster  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  1.1  oster  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  1.1  oster  *
     18  1.1  oster  * Carnegie Mellon requests users of this software to return to
     19  1.1  oster  *
     20  1.1  oster  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  1.1  oster  *  School of Computer Science
     22  1.1  oster  *  Carnegie Mellon University
     23  1.1  oster  *  Pittsburgh PA 15213-3890
     24  1.1  oster  *
     25  1.1  oster  * any improvements or extensions that they make and grant Carnegie the
     26  1.1  oster  * rights to redistribute these changes.
     27  1.1  oster  */
     28  1.1  oster 
     29  1.1  oster /*
     30  1.1  oster  * Code for RAID-EVENODD  architecture.
     31  1.1  oster  */
     32  1.1  oster 
     33  1.1  oster #include "rf_types.h"
     34  1.1  oster #include "rf_raid.h"
     35  1.1  oster #include "rf_dag.h"
     36  1.1  oster #include "rf_dagffrd.h"
     37  1.1  oster #include "rf_dagffwr.h"
     38  1.1  oster #include "rf_dagdegrd.h"
     39  1.1  oster #include "rf_dagdegwr.h"
     40  1.1  oster #include "rf_dagutils.h"
     41  1.1  oster #include "rf_dagfuncs.h"
     42  1.1  oster #include "rf_threadid.h"
     43  1.1  oster #include "rf_etimer.h"
     44  1.1  oster #include "rf_general.h"
     45  1.1  oster #include "rf_configure.h"
     46  1.1  oster #include "rf_parityscan.h"
     47  1.1  oster #include "rf_sys.h"
     48  1.1  oster #include "rf_evenodd.h"
     49  1.1  oster #include "rf_evenodd_dagfuncs.h"
     50  1.1  oster 
     51  1.1  oster /* These redundant functions are for small write */
     52  1.1  oster RF_RedFuncs_t rf_EOSmallWritePFuncs = { rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P" };
     53  1.1  oster RF_RedFuncs_t rf_EOSmallWriteEFuncs = { rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E" };
     54  1.1  oster 
     55  1.1  oster /* These redundant functions are for degraded read */
     56  1.1  oster RF_RedFuncs_t rf_eoPRecoveryFuncs =  { rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
     57  1.1  oster RF_RedFuncs_t rf_eoERecoveryFuncs = { rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func" };
     58  1.1  oster 
     59  1.1  oster /**********************************************************************************************
     60  1.1  oster  *   the following encoding node functions is used in  EO_000_CreateLargeWriteDAG
     61  1.1  oster  **********************************************************************************************/
     62  1.1  oster int rf_RegularPEFunc(node)
     63  1.1  oster   RF_DagNode_t  *node;
     64  1.1  oster {
     65  1.1  oster    rf_RegularESubroutine(node,node->results[1]);
     66  1.1  oster    rf_RegularXorFunc(node);    /* does the wakeup here! */
     67  1.1  oster #if 1
     68  1.1  oster    return(0); /* XXX This was missing... GO */
     69  1.1  oster #endif
     70  1.1  oster }
     71  1.1  oster 
     72  1.1  oster 
     73  1.1  oster /************************************************************************************************
     74  1.1  oster  *  For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
     75  1.1  oster  *  be used. The previous case is when write access at least sectors of full stripe unit.
     76  1.1  oster  *  The later function is used when the write access two stripe units but with total sectors
     77  1.1  oster  *  less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
     78  1.1  oster  *  areas in their stripe unit and  parity write and 'E' write are both devided into two distinct
     79  1.1  oster  *  writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
     80  1.1  oster  ************************************************************************************************/
     81  1.1  oster 
     82  1.1  oster /* Algorithm:
     83  1.1  oster      1. Store the difference of old data and new data in the Rod buffer.
     84  1.1  oster      2. then encode this buffer into the buffer which already have old 'E' information inside it,
     85  1.1  oster 	the result can be shown to be the new 'E' information.
     86  1.1  oster      3. xor the Wnd buffer into the difference buffer to recover the  original old data.
     87  1.1  oster    Here we have another alternative: to allocate a temporary buffer for storing the difference of
     88  1.1  oster    old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
     89  1.1  oster    take the same speed as the previous, and need more memory.
     90  1.1  oster */
     91  1.1  oster int rf_RegularONEFunc(node)
     92  1.1  oster   RF_DagNode_t  *node;
     93  1.1  oster {
     94  1.1  oster   RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
     95  1.1  oster   RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
     96  1.1  oster   int EpdaIndex = (node->numParams-1)/2 - 1; /* the parameter of node where you can find e-pda */
     97  1.1  oster   int i, k, retcode = 0;
     98  1.1  oster   int suoffset, length;
     99  1.1  oster   RF_RowCol_t scol;
    100  1.1  oster   char *srcbuf, *destbuf;
    101  1.1  oster   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    102  1.1  oster   RF_Etimer_t timer;
    103  1.1  oster   RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
    104  1.1  oster   int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector); /* generally zero  */
    105  1.1  oster 
    106  1.1  oster   RF_ASSERT( EPDA->type == RF_PDA_TYPE_Q );
    107  1.1  oster   RF_ASSERT(ESUOffset == 0);
    108  1.1  oster 
    109  1.1  oster   RF_ETIMER_START(timer);
    110  1.1  oster 
    111  1.1  oster   /* Xor the Wnd buffer into Rod buffer, the difference of old data and new data is stored in Rod buffer */
    112  1.1  oster   for( k=0; k< EpdaIndex; k += 2) {
    113  1.1  oster    length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[k].p)->numSector );
    114  1.1  oster    retcode = rf_bxor( node->params[k+EpdaIndex+3].p, node->params[k+1].p, length, node->dagHdr->bp);
    115  1.1  oster   }
    116  1.1  oster   /* Start to encoding the buffer storing the difference of old data and new data into 'E' buffer  */
    117  1.1  oster   for (i=0; i<EpdaIndex; i+=2) if (node->params[i+1].p != node->results[0]) { /* results[0] is buf ptr of E */
    118  1.1  oster     pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    119  1.1  oster     srcbuf = (char *) node->params[i+1].p;
    120  1.1  oster     scol = rf_EUCol(layoutPtr, pda->raidAddress );
    121  1.1  oster     suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    122  1.1  oster     destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr,suoffset);
    123  1.1  oster     rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    124  1.1  oster   }
    125  1.1  oster   /* Recover the original old data to be used by parity encoding function in XorNode */
    126  1.1  oster   for( k=0; k< EpdaIndex; k += 2) {
    127  1.1  oster    length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[k].p)->numSector );
    128  1.1  oster    retcode = rf_bxor( node->params[k+EpdaIndex+3].p, node->params[k+1].p, length, node->dagHdr->bp);
    129  1.1  oster   }
    130  1.1  oster   RF_ETIMER_STOP(timer);
    131  1.1  oster   RF_ETIMER_EVAL(timer);
    132  1.1  oster   tracerec->q_us += RF_ETIMER_VAL_US(timer);
    133  1.1  oster   rf_GenericWakeupFunc(node, 0);
    134  1.1  oster #if 1
    135  1.1  oster   return(0); /* XXX this was missing.. GO */
    136  1.1  oster #endif
    137  1.1  oster }
    138  1.1  oster 
    139  1.1  oster int rf_SimpleONEFunc(node)
    140  1.1  oster   RF_DagNode_t   *node;
    141  1.1  oster {
    142  1.1  oster   RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
    143  1.1  oster   RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
    144  1.1  oster   RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
    145  1.1  oster   int retcode = 0;
    146  1.1  oster   char *srcbuf, *destbuf;
    147  1.1  oster   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    148  1.1  oster   int length;
    149  1.1  oster   RF_RowCol_t scol;
    150  1.1  oster   RF_Etimer_t timer;
    151  1.1  oster 
    152  1.1  oster   RF_ASSERT( ((RF_PhysDiskAddr_t *)node->params[2].p)->type == RF_PDA_TYPE_Q );
    153  1.1  oster   if (node->dagHdr->status == rf_enable) {
    154  1.1  oster      RF_ETIMER_START(timer);
    155  1.1  oster      length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[4].p)->numSector );/* this is a pda of writeDataNodes */
    156  1.1  oster      /* bxor to buffer of readDataNodes */
    157  1.1  oster      retcode = rf_bxor( node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
    158  1.1  oster      /* find out the corresponding colume in encoding matrix for write colume to be encoded into redundant disk 'E' */
    159  1.1  oster      scol = rf_EUCol(layoutPtr, pda->raidAddress );
    160  1.1  oster      srcbuf = node->params[1].p;
    161  1.1  oster      destbuf = node->params[3].p;
    162  1.1  oster      /* Start encoding process */
    163  1.1  oster      rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    164  1.1  oster      rf_bxor( node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
    165  1.1  oster      RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer);
    166  1.1  oster 
    167  1.1  oster   }
    168  1.1  oster   return(rf_GenericWakeupFunc(node, retcode));     /* call wake func explicitly since no I/O in this node */
    169  1.1  oster }
    170  1.1  oster 
    171  1.1  oster 
    172  1.1  oster /****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write  ********/
    173  1.1  oster void rf_RegularESubroutine(node, ebuf)
    174  1.1  oster   RF_DagNode_t  *node;
    175  1.1  oster   char          *ebuf;
    176  1.1  oster {
    177  1.1  oster   RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
    178  1.1  oster   RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
    179  1.1  oster   RF_PhysDiskAddr_t *pda;
    180  1.1  oster   int i, suoffset;
    181  1.1  oster   RF_RowCol_t scol;
    182  1.1  oster   char *srcbuf, *destbuf;
    183  1.1  oster   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    184  1.1  oster   RF_Etimer_t timer;
    185  1.1  oster 
    186  1.1  oster   RF_ETIMER_START(timer);
    187  1.1  oster   for (i=0; i<node->numParams-2; i+=2) {
    188  1.1  oster     RF_ASSERT( node->params[i+1].p != ebuf );
    189  1.1  oster     pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    190  1.1  oster     suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    191  1.1  oster     scol = rf_EUCol(layoutPtr, pda->raidAddress );
    192  1.1  oster     srcbuf = (char *) node->params[i+1].p;
    193  1.1  oster     destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset );
    194  1.1  oster     rf_e_encToBuf(raidPtr,  scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    195  1.1  oster   }
    196  1.1  oster   RF_ETIMER_STOP(timer);
    197  1.1  oster   RF_ETIMER_EVAL(timer);
    198  1.1  oster   tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    199  1.1  oster }
    200  1.1  oster 
    201  1.1  oster 
    202  1.1  oster /*******************************************************************************************
    203  1.1  oster  *			 Used in  EO_001_CreateLargeWriteDAG
    204  1.1  oster  ******************************************************************************************/
    205  1.1  oster int rf_RegularEFunc(node)
    206  1.1  oster   RF_DagNode_t  *node;
    207  1.1  oster {
    208  1.1  oster    rf_RegularESubroutine(node, node->results[0]);
    209  1.1  oster    rf_GenericWakeupFunc(node, 0);
    210  1.1  oster #if 1
    211  1.1  oster    return(0); /* XXX this was missing?.. GO */
    212  1.1  oster #endif
    213  1.1  oster }
    214  1.1  oster 
    215  1.1  oster /*******************************************************************************************
    216  1.1  oster  * This degraded function allow only two case:
    217  1.1  oster  *  1. when write access the full failed stripe unit, then the access can be more than
    218  1.1  oster  *     one tripe units.
    219  1.1  oster  *  2. when write access only part of the failed SU, we assume accesses of more than
    220  1.1  oster  *     one stripe unit is not allowed so that the write can be dealt with like a
    221  1.1  oster  *     large write.
    222  1.1  oster  *  The following function is based on these assumptions. So except in the second case,
    223  1.1  oster  *  it looks the same as a large write encodeing function. But this is not exactly the
    224  1.1  oster  *  normal way for doing a degraded write, since raidframe have to break cases of access
    225  1.1  oster  *  other than the above two into smaller accesses. We may have to change
    226  1.1  oster  *  DegrESubroutin in the future.
    227  1.1  oster  *******************************************************************************************/
    228  1.1  oster void rf_DegrESubroutine(node, ebuf)
    229  1.1  oster   RF_DagNode_t  *node;
    230  1.1  oster   char          *ebuf;
    231  1.1  oster {
    232  1.1  oster   RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
    233  1.1  oster   RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
    234  1.1  oster   RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams-2].p;
    235  1.1  oster   RF_PhysDiskAddr_t *pda;
    236  1.1  oster   int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    237  1.1  oster   RF_RowCol_t scol;
    238  1.1  oster   char *srcbuf, *destbuf;
    239  1.1  oster   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    240  1.1  oster   RF_Etimer_t timer;
    241  1.1  oster 
    242  1.1  oster   RF_ETIMER_START(timer);
    243  1.1  oster   for (i=0; i<node->numParams-2; i+=2) {
    244  1.1  oster     RF_ASSERT( node->params[i+1].p != ebuf );
    245  1.1  oster     pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    246  1.1  oster     suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    247  1.1  oster     scol = rf_EUCol(layoutPtr, pda->raidAddress );
    248  1.1  oster     srcbuf = (char *) node->params[i+1].p;
    249  1.1  oster     destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset-failedSUOffset);
    250  1.1  oster     rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
    251  1.1  oster   }
    252  1.1  oster 
    253  1.1  oster   RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer);
    254  1.1  oster }
    255  1.1  oster 
    256  1.1  oster 
    257  1.1  oster /**************************************************************************************
    258  1.1  oster  * This function is used in case where one data disk failed and both redundant disks
    259  1.1  oster  * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
    260  1.1  oster  * failed in the stripe but not accessed at this time, then we should, instead, use
    261  1.1  oster  * the rf_EOWriteDoubleRecoveryFunc().
    262  1.1  oster  **************************************************************************************/
    263  1.1  oster int rf_Degraded_100_EOFunc(node)
    264  1.1  oster   RF_DagNode_t  *node;
    265  1.1  oster {
    266  1.1  oster   rf_DegrESubroutine(node, node->results[1]);
    267  1.1  oster   rf_RecoveryXorFunc(node);  /* does the wakeup here! */
    268  1.1  oster #if 1
    269  1.1  oster   return(0); /* XXX this was missing... SHould these be void functions??? GO */
    270  1.1  oster #endif
    271  1.1  oster }
    272  1.1  oster 
    273  1.1  oster /**************************************************************************************
    274  1.1  oster  * This function is to encode one sector in one of the data disks to the E disk.
    275  1.1  oster  * However, in evenodd this function can also be used as decoding function to recover
    276  1.1  oster  * data from dead disk in the case of parity failure and a single data failure.
    277  1.1  oster  **************************************************************************************/
    278  1.1  oster void rf_e_EncOneSect(
    279  1.1  oster   RF_RowCol_t   srcLogicCol,
    280  1.1  oster   char         *srcSecbuf,
    281  1.1  oster   RF_RowCol_t   destLogicCol,
    282  1.1  oster   char         *destSecbuf,
    283  1.1  oster   int           bytesPerSector)
    284  1.1  oster {
    285  1.1  oster   int S_index;  /* index of the EU in the src col which need be Xored into all EUs in a dest sector */
    286  1.1  oster   int numRowInEncMatix = (RF_EO_MATRIX_DIM) -1;
    287  1.1  oster   RF_RowCol_t j, indexInDest,   /* row index of an encoding unit in the destination colume of encoding matrix */
    288  1.1  oster               indexInSrc;  /* row index of an encoding unit in the source colume used for recovery */
    289  1.1  oster   int bytesPerEU = bytesPerSector/numRowInEncMatix;
    290  1.1  oster 
    291  1.1  oster #if RF_EO_MATRIX_DIM > 17
    292  1.1  oster   int shortsPerEU = bytesPerEU/sizeof(short);
    293  1.1  oster   short *destShortBuf, *srcShortBuf1, *srcShortBuf2;
    294  1.1  oster   register short temp1;
    295  1.1  oster #elif RF_EO_MATRIX_DIM == 17
    296  1.1  oster   int longsPerEU = bytesPerEU/sizeof(long);
    297  1.1  oster   long *destLongBuf, *srcLongBuf1, *srcLongBuf2;
    298  1.1  oster   register long temp1;
    299  1.1  oster #endif
    300  1.1  oster 
    301  1.1  oster #if RF_EO_MATRIX_DIM > 17
    302  1.1  oster   RF_ASSERT( sizeof(short) == 2 || sizeof(short) == 1 );
    303  1.1  oster   RF_ASSERT( bytesPerEU % sizeof(short) == 0 );
    304  1.1  oster #elif RF_EO_MATRIX_DIM == 17
    305  1.1  oster   RF_ASSERT( sizeof(long) == 8 || sizeof(long) == 4 );
    306  1.1  oster   RF_ASSERT( bytesPerEU % sizeof(long) == 0);
    307  1.1  oster #endif
    308  1.1  oster 
    309  1.1  oster   S_index = rf_EO_Mod( ( RF_EO_MATRIX_DIM -1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
    310  1.1  oster #if RF_EO_MATRIX_DIM > 17
    311  1.1  oster   srcShortBuf1 = (short *)(srcSecbuf + S_index * bytesPerEU);
    312  1.1  oster #elif RF_EO_MATRIX_DIM == 17
    313  1.1  oster   srcLongBuf1 = (long *)(srcSecbuf + S_index * bytesPerEU);
    314  1.1  oster #endif
    315  1.1  oster 
    316  1.1  oster   for( indexInDest = 0; indexInDest < numRowInEncMatix ; indexInDest++){
    317  1.1  oster      indexInSrc = rf_EO_Mod( (indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM );
    318  1.1  oster 
    319  1.1  oster #if RF_EO_MATRIX_DIM > 17
    320  1.1  oster      destShortBuf = (short *)(destSecbuf + indexInDest * bytesPerEU);
    321  1.1  oster      srcShortBuf2 = (short *)(srcSecbuf + indexInSrc * bytesPerEU);
    322  1.1  oster      for(j=0; j < shortsPerEU; j++) {
    323  1.1  oster         temp1 = destShortBuf[j]^srcShortBuf1[j];
    324  1.1  oster         /* note: S_index won't be at the end row for any src col! */
    325  1.1  oster         if(indexInSrc != RF_EO_MATRIX_DIM -1) destShortBuf[j] = (srcShortBuf2[j])^temp1;
    326  1.1  oster         /* if indexInSrc is at the end row, ie. RF_EO_MATRIX_DIM -1, then all elements are zero! */
    327  1.1  oster 	else destShortBuf[j] = temp1;
    328  1.1  oster      }
    329  1.1  oster 
    330  1.1  oster #elif RF_EO_MATRIX_DIM == 17
    331  1.1  oster      destLongBuf = (long *)(destSecbuf + indexInDest * bytesPerEU);
    332  1.1  oster      srcLongBuf2 = (long *)(srcSecbuf + indexInSrc * bytesPerEU);
    333  1.1  oster      for(j=0; j < longsPerEU; j++) {
    334  1.1  oster         temp1 = destLongBuf[j]^srcLongBuf1[j];
    335  1.1  oster         if(indexInSrc != RF_EO_MATRIX_DIM -1) destLongBuf[j] = (srcLongBuf2[j])^temp1;
    336  1.1  oster         else destLongBuf[j] = temp1;
    337  1.1  oster      }
    338  1.1  oster #endif
    339  1.1  oster   }
    340  1.1  oster }
    341  1.1  oster 
    342  1.1  oster void rf_e_encToBuf(
    343  1.1  oster   RF_Raid_t    *raidPtr,
    344  1.1  oster   RF_RowCol_t   srcLogicCol,
    345  1.1  oster   char         *srcbuf,
    346  1.1  oster   RF_RowCol_t   destLogicCol,
    347  1.1  oster   char         *destbuf,
    348  1.1  oster   int           numSector)
    349  1.1  oster {
    350  1.1  oster   int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
    351  1.1  oster 
    352  1.1  oster   for (i=0; i < numSector; i++)
    353  1.1  oster   {
    354  1.1  oster      rf_e_EncOneSect( srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
    355  1.1  oster      srcbuf += bytesPerSector;
    356  1.1  oster      destbuf += bytesPerSector;
    357  1.1  oster   }
    358  1.1  oster }
    359  1.1  oster 
    360  1.1  oster /**************************************************************************************
    361  1.1  oster  * when parity die and one data die, We use second redundant information, 'E',
    362  1.1  oster  * to recover the data in dead disk. This function is used in the recovery node of
    363  1.1  oster  * for EO_110_CreateReadDAG
    364  1.1  oster  **************************************************************************************/
    365  1.1  oster int rf_RecoveryEFunc(node)
    366  1.1  oster   RF_DagNode_t  *node;
    367  1.1  oster {
    368  1.1  oster   RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
    369  1.1  oster   RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
    370  1.1  oster   RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams-2].p;
    371  1.1  oster   RF_RowCol_t scol, /*source logical column*/
    372  1.1  oster               fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress ); /* logical column of failed SU */
    373  1.1  oster   int i;
    374  1.1  oster   RF_PhysDiskAddr_t *pda;
    375  1.1  oster   int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr,failedPDA->startSector);
    376  1.1  oster   char *srcbuf, *destbuf;
    377  1.1  oster   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    378  1.1  oster   RF_Etimer_t timer;
    379  1.1  oster 
    380  1.1  oster   bzero( (char *)node->results[0], rf_RaidAddressToByte(raidPtr,failedPDA->numSector));
    381  1.1  oster   if (node->dagHdr->status == rf_enable) {
    382  1.1  oster     RF_ETIMER_START(timer);
    383  1.1  oster     for (i=0; i<node->numParams-2; i+=2) if (node->params[i+1].p != node->results[0]) {
    384  1.1  oster       pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    385  1.1  oster       if( i == node->numParams - 4 ) scol = RF_EO_MATRIX_DIM - 2; /* the colume of redundant E */
    386  1.1  oster       else scol = rf_EUCol(layoutPtr, pda->raidAddress );
    387  1.1  oster       srcbuf = (char *) node->params[i+1].p;
    388  1.1  oster       suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    389  1.1  oster       destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr,suoffset-failedSUOffset);
    390  1.1  oster       rf_e_encToBuf(raidPtr,  scol, srcbuf, fcol, destbuf, pda->numSector);
    391  1.1  oster     }
    392  1.1  oster     RF_ETIMER_STOP(timer);
    393  1.1  oster     RF_ETIMER_EVAL(timer);
    394  1.1  oster     tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    395  1.1  oster   }
    396  1.1  oster   return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */
    397  1.1  oster }
    398  1.1  oster 
    399  1.1  oster /**************************************************************************************
    400  1.1  oster  * This function is used in the case where one data and the parity have filed.
    401  1.1  oster  * (in EO_110_CreateWriteDAG )
    402  1.1  oster  **************************************************************************************/
    403  1.1  oster int rf_EO_DegradedWriteEFunc(RF_DagNode_t *node)
    404  1.1  oster {
    405  1.1  oster   rf_DegrESubroutine(node, node->results[0]);
    406  1.1  oster   rf_GenericWakeupFunc(node, 0);
    407  1.1  oster #if 1
    408  1.1  oster   return(0); /* XXX Yet another one!! GO */
    409  1.1  oster #endif
    410  1.1  oster }
    411  1.1  oster 
    412  1.1  oster 
    413  1.1  oster 
    414  1.1  oster /**************************************************************************************
    415  1.1  oster  *  		THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
    416  1.1  oster  **************************************************************************************/
    417  1.1  oster 
    418  1.1  oster void rf_doubleEOdecode(
    419  1.1  oster   RF_Raid_t     *raidPtr,
    420  1.1  oster   char         **rrdbuf,
    421  1.1  oster   char         **dest,
    422  1.1  oster   RF_RowCol_t   *fcol,
    423  1.1  oster   char          *pbuf,
    424  1.1  oster   char          *ebuf)
    425  1.1  oster {
    426  1.1  oster   RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
    427  1.1  oster   int i, j, k, f1, f2, row;
    428  1.1  oster   int rrdrow, erow, count = 0;
    429  1.1  oster   int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1 );
    430  1.1  oster   int numRowInEncMatix = (RF_EO_MATRIX_DIM) -1;
    431  1.1  oster #if 0
    432  1.1  oster   int pcol = (RF_EO_MATRIX_DIM) - 1;
    433  1.1  oster #endif
    434  1.1  oster   int ecol = (RF_EO_MATRIX_DIM) - 2;
    435  1.1  oster   int bytesPerEU = bytesPerSector/numRowInEncMatix;
    436  1.1  oster   int numDataCol  = layoutPtr->numDataCol;
    437  1.1  oster #if RF_EO_MATRIX_DIM > 17
    438  1.1  oster   int shortsPerEU = bytesPerEU/sizeof(short);
    439  1.1  oster   short *rrdbuf_current, *pbuf_current, *ebuf_current;
    440  1.1  oster   short *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    441  1.1  oster   register short *temp;
    442  1.1  oster   short *P;
    443  1.1  oster 
    444  1.1  oster   RF_ASSERT( bytesPerEU % sizeof(short) == 0);
    445  1.1  oster   RF_Malloc(P, bytesPerEU, (short *));
    446  1.1  oster   RF_Malloc(temp, bytesPerEU, (short *));
    447  1.1  oster #elif RF_EO_MATRIX_DIM == 17
    448  1.1  oster   int longsPerEU = bytesPerEU/sizeof(long);
    449  1.1  oster   long *rrdbuf_current, *pbuf_current, *ebuf_current;
    450  1.1  oster   long *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
    451  1.1  oster   register long *temp;
    452  1.1  oster   long *P;
    453  1.1  oster 
    454  1.1  oster   RF_ASSERT( bytesPerEU % sizeof(long) == 0);
    455  1.1  oster   RF_Malloc(P, bytesPerEU, (long *));
    456  1.1  oster   RF_Malloc(temp, bytesPerEU, (long *));
    457  1.1  oster #endif
    458  1.1  oster   RF_ASSERT( *((long *)dest[0]) == 0);
    459  1.1  oster   RF_ASSERT( *((long *)dest[1]) == 0);
    460  1.1  oster   bzero((char *)P, bytesPerEU);
    461  1.1  oster   bzero((char *)temp, bytesPerEU);
    462  1.1  oster   RF_ASSERT( *P == 0 );
    463  1.1  oster   /* calculate the 'P' parameter, which, not parity, is the Xor of all elements in
    464  1.1  oster      the last two column, ie. 'E' and 'parity' colume, see the Ref. paper by Blaum, et al 1993  */
    465  1.1  oster   for( i=0; i< numRowInEncMatix; i++)
    466  1.1  oster        for( k=0; k< longsPerEU; k++) {
    467  1.1  oster #if RF_EO_MATRIX_DIM > 17
    468  1.1  oster             ebuf_current = ((short *)ebuf) + i*shortsPerEU + k;
    469  1.1  oster             pbuf_current = ((short *)pbuf) + i*shortsPerEU + k;
    470  1.1  oster #elif RF_EO_MATRIX_DIM == 17
    471  1.1  oster             ebuf_current = ((long *)ebuf) + i*longsPerEU + k;
    472  1.1  oster             pbuf_current = ((long *)pbuf) + i*longsPerEU + k;
    473  1.1  oster #endif
    474  1.1  oster             P[k] ^= *ebuf_current;
    475  1.1  oster             P[k] ^= *pbuf_current;
    476  1.1  oster        }
    477  1.1  oster   RF_ASSERT( fcol[0] != fcol[1] );
    478  1.1  oster   if( fcol[0] < fcol[1] ) {
    479  1.1  oster #if RF_EO_MATRIX_DIM > 17
    480  1.1  oster         dest_smaller = (short *)(dest[0]);
    481  1.1  oster         dest_larger = (short *)(dest[1]);
    482  1.1  oster #elif RF_EO_MATRIX_DIM == 17
    483  1.1  oster 	dest_smaller = (long *)(dest[0]);
    484  1.1  oster 	dest_larger = (long *)(dest[1]);
    485  1.1  oster #endif
    486  1.1  oster 	f1 = fcol[0];
    487  1.1  oster 	f2 = fcol[1];
    488  1.1  oster   }
    489  1.1  oster   else {
    490  1.1  oster #if RF_EO_MATRIX_DIM > 17
    491  1.1  oster         dest_smaller = (short *)(dest[1]);
    492  1.1  oster         dest_larger = (short *)(dest[0]);
    493  1.1  oster #elif RF_EO_MATRIX_DIM == 17
    494  1.1  oster 	dest_smaller = (long *)(dest[1]);
    495  1.1  oster 	dest_larger = (long *)(dest[0]);
    496  1.1  oster #endif
    497  1.1  oster 	f1 = fcol[1];
    498  1.1  oster 	f2 = fcol[0];
    499  1.1  oster   }
    500  1.1  oster   row = (RF_EO_MATRIX_DIM) -1;
    501  1.1  oster   while( (row = rf_EO_Mod( (row+f1-f2), RF_EO_MATRIX_DIM )) != ( (RF_EO_MATRIX_DIM) -1) )
    502  1.1  oster   {
    503  1.1  oster #if RF_EO_MATRIX_DIM > 17
    504  1.1  oster        dest_larger_current = dest_larger + row*shortsPerEU;
    505  1.1  oster        dest_smaller_current = dest_smaller + row*shortsPerEU;
    506  1.1  oster #elif RF_EO_MATRIX_DIM == 17
    507  1.1  oster        dest_larger_current = dest_larger + row*longsPerEU;
    508  1.1  oster        dest_smaller_current = dest_smaller + row*longsPerEU;
    509  1.1  oster #endif
    510  1.1  oster        /**    Do the diagonal recovery. Initially, temp[k] = (failed 1),
    511  1.1  oster 	      which is the failed data in the colume which has smaller col index. **/
    512  1.1  oster        /*   step 1:  ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3))         */
    513  1.1  oster        for( j=0; j< numDataCol; j++)
    514  1.1  oster        {
    515  1.1  oster              if( j == f1 || j == f2 ) continue;
    516  1.1  oster              rrdrow = rf_EO_Mod( (row+f2-j), RF_EO_MATRIX_DIM );
    517  1.1  oster 	     if ( rrdrow != (RF_EO_MATRIX_DIM) -1 ) {
    518  1.1  oster #if RF_EO_MATRIX_DIM > 17
    519  1.1  oster                  rrdbuf_current = (short *)(rrdbuf[j]) + rrdrow * shortsPerEU;
    520  1.1  oster                  for (k=0; k< shortsPerEU; k++) temp[k] ^= *(rrdbuf_current + k);
    521  1.1  oster #elif RF_EO_MATRIX_DIM == 17
    522  1.1  oster 	         rrdbuf_current = (long *)(rrdbuf[j]) + rrdrow * longsPerEU;
    523  1.1  oster                  for (k=0; k< longsPerEU; k++) temp[k] ^= *(rrdbuf_current + k);
    524  1.1  oster #endif
    525  1.1  oster 	     }
    526  1.1  oster        }
    527  1.1  oster        /*   step 2:  ^E(erow,m-2), If erow is at the buttom row, don't Xor into it
    528  1.1  oster 	    E(erow,m-2) = (principle diagonal) ^ (failed 1) ^ (failed 2)
    529  1.1  oster                         ^ ( SUM of nonfailed in-diagonal A(rrdrow,0..m-3) )
    530  1.1  oster             After this step, temp[k] = (principle diagonal) ^ (failed 2)       */
    531  1.1  oster 
    532  1.1  oster        erow = rf_EO_Mod( (row+f2-ecol), (RF_EO_MATRIX_DIM) );
    533  1.1  oster        if ( erow != (RF_EO_MATRIX_DIM) -1) {
    534  1.1  oster #if RF_EO_MATRIX_DIM > 17
    535  1.1  oster            ebuf_current = (short *)ebuf + shortsPerEU * erow;
    536  1.1  oster            for (k=0; k< shortsPerEU; k++) temp[k] ^= *(ebuf_current+k);
    537  1.1  oster #elif RF_EO_MATRIX_DIM == 17
    538  1.1  oster            ebuf_current = (long *)ebuf + longsPerEU * erow;
    539  1.1  oster            for (k=0; k< longsPerEU; k++) temp[k] ^= *(ebuf_current+k);
    540  1.1  oster #endif
    541  1.1  oster        }
    542  1.1  oster        /*   step 3: ^P to obtain the failed data (failed 2).
    543  1.1  oster 	    P can be proved to be actually  (principle diagonal)
    544  1.1  oster             After this step, temp[k] = (failed 2), the failed data to be recovered */
    545  1.1  oster #if RF_EO_MATRIX_DIM > 17
    546  1.1  oster        for (k=0; k< shortsPerEU; k++) temp[k] ^= P[k];
    547  1.1  oster        /*   Put the data to the destination buffer                              */
    548  1.1  oster        for (k=0; k< shortsPerEU; k++) dest_larger_current[k] = temp[k];
    549  1.1  oster #elif RF_EO_MATRIX_DIM == 17
    550  1.1  oster        for (k=0; k< longsPerEU; k++) temp[k] ^= P[k];
    551  1.1  oster        /*   Put the data to the destination buffer                              */
    552  1.1  oster        for (k=0; k< longsPerEU; k++) dest_larger_current[k] = temp[k];
    553  1.1  oster #endif
    554  1.1  oster 
    555  1.1  oster        /**          THE FOLLOWING DO THE HORIZONTAL XOR                **/
    556  1.1  oster        /*   step 1:  ^(SUM of A(row,0..m-3)), ie. all nonfailed data columes    */
    557  1.1  oster        for (j=0; j< numDataCol; j++)
    558  1.1  oster        {
    559  1.1  oster              if( j == f1 || j == f2 ) continue;
    560  1.1  oster #if RF_EO_MATRIX_DIM > 17
    561  1.1  oster              rrdbuf_current = (short *)(rrdbuf[j]) + row * shortsPerEU;
    562  1.1  oster              for (k=0; k< shortsPerEU; k++) temp[k] ^= *(rrdbuf_current+k);
    563  1.1  oster #elif RF_EO_MATRIX_DIM == 17
    564  1.1  oster 	     rrdbuf_current = (long *)(rrdbuf[j]) + row * longsPerEU;
    565  1.1  oster              for (k=0; k< longsPerEU; k++) temp[k] ^= *(rrdbuf_current+k);
    566  1.1  oster #endif
    567  1.1  oster        }
    568  1.1  oster        /*   step 2: ^A(row,m-1) */
    569  1.1  oster        /*   step 3: Put the data to the destination buffer                             	*/
    570  1.1  oster #if RF_EO_MATRIX_DIM > 17
    571  1.1  oster        pbuf_current = (short *)pbuf + shortsPerEU * row;
    572  1.1  oster        for (k=0; k< shortsPerEU; k++) temp[k] ^= *(pbuf_current+k);
    573  1.1  oster        for (k=0; k< shortsPerEU; k++) dest_smaller_current[k] = temp[k];
    574  1.1  oster #elif RF_EO_MATRIX_DIM == 17
    575  1.1  oster        pbuf_current = (long *)pbuf + longsPerEU * row;
    576  1.1  oster        for (k=0; k< longsPerEU; k++) temp[k] ^= *(pbuf_current+k);
    577  1.1  oster        for (k=0; k< longsPerEU; k++) dest_smaller_current[k] = temp[k];
    578  1.1  oster #endif
    579  1.1  oster        count++;
    580  1.1  oster   }
    581  1.1  oster   /*        Check if all Encoding Unit in the data buffer have been decoded,
    582  1.1  oster 	    according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
    583  1.1  oster 	    this algorithm will covered all buffer 				 */
    584  1.1  oster   RF_ASSERT( count == numRowInEncMatix );
    585  1.1  oster   RF_Free((char *)P, bytesPerEU);
    586  1.1  oster   RF_Free((char *)temp, bytesPerEU);
    587  1.1  oster }
    588  1.1  oster 
    589  1.1  oster 
    590  1.1  oster /***************************************************************************************
    591  1.1  oster * 	This function is called by double degragded read
    592  1.1  oster * 	EO_200_CreateReadDAG
    593  1.1  oster *
    594  1.1  oster ***************************************************************************************/
    595  1.1  oster int rf_EvenOddDoubleRecoveryFunc(node)
    596  1.1  oster   RF_DagNode_t  *node;
    597  1.1  oster {
    598  1.1  oster   int ndataParam = 0;
    599  1.1  oster   int np = node->numParams;
    600  1.1  oster   RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np-1].p;
    601  1.1  oster   RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-2].p;
    602  1.1  oster   RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
    603  1.1  oster   int i, prm, sector, nresults = node->numResults;
    604  1.1  oster   RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    605  1.1  oster   unsigned sosAddr;
    606  1.1  oster   int two = 0, mallc_one= 0, mallc_two = 0;    /* flags to indicate if memory is allocated */
    607  1.1  oster   int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1 );
    608  1.1  oster   RF_PhysDiskAddr_t *ppda,*ppda2,*epda,*epda2,*pda, *pda0, *pda1, npda;
    609  1.1  oster   RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
    610  1.1  oster   char **buf, *ebuf, *pbuf, *dest[2];
    611  1.1  oster   long *suoff=NULL, *suend=NULL, *prmToCol=NULL, psuoff, esuoff;
    612  1.1  oster   RF_SectorNum_t startSector, endSector;
    613  1.1  oster   RF_Etimer_t timer;
    614  1.1  oster   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    615  1.1  oster 
    616  1.1  oster   RF_ETIMER_START(timer);
    617  1.1  oster 
    618  1.1  oster   /* Find out the number of parameters which are pdas for data information */
    619  1.1  oster   for (i = 0; i<= np; i++)
    620  1.1  oster      if( ((RF_PhysDiskAddr_t *)node->params[i].p)->type != RF_PDA_TYPE_DATA) {ndataParam = i ; break; }
    621  1.1  oster 
    622  1.1  oster   RF_Malloc(buf, numDataCol*sizeof(char *), (char **));
    623  1.1  oster   if (ndataParam != 0 ){
    624  1.1  oster       RF_Malloc(suoff, ndataParam*sizeof(long), (long *) );
    625  1.1  oster       RF_Malloc(suend, ndataParam*sizeof(long), (long *) );
    626  1.1  oster       RF_Malloc(prmToCol, ndataParam*sizeof(long), (long *) );
    627  1.1  oster   }
    628  1.1  oster 
    629  1.1  oster   if (asmap->failedPDAs[1] &&
    630  1.1  oster       (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
    631  1.1  oster       RF_ASSERT(0); /* currently, no support for this situation */
    632  1.1  oster       ppda  = node->params[np-6].p;
    633  1.1  oster       ppda2 = node->params[np-5].p;
    634  1.1  oster       RF_ASSERT( ppda2->type == RF_PDA_TYPE_PARITY );
    635  1.1  oster       epda  = node->params[np-4].p;
    636  1.1  oster       epda2 = node->params[np-3].p;
    637  1.1  oster       RF_ASSERT( epda2->type == RF_PDA_TYPE_Q );
    638  1.1  oster       two = 1;
    639  1.1  oster   }
    640  1.1  oster   else {
    641  1.1  oster       ppda = node->params[np-4].p;
    642  1.1  oster       epda = node->params[np-3].p;
    643  1.1  oster       psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
    644  1.1  oster       esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
    645  1.1  oster       RF_ASSERT( psuoff == esuoff );
    646  1.1  oster   }
    647  1.1  oster   /*
    648  1.1  oster       the followings have three goals:
    649  1.1  oster       1. determine the startSector to begin decoding and endSector to end decoding.
    650  1.1  oster       2. determine the colume numbers of the two failed disks.
    651  1.1  oster       3. determine the offset and end offset of the access within each failed stripe unit.
    652  1.1  oster    */
    653  1.1  oster   if( nresults == 1 ) {
    654  1.1  oster       /* find the startSector to begin decoding */
    655  1.1  oster       pda = node->results[0];
    656  1.1  oster       bzero(pda->bufPtr, bytesPerSector*pda->numSector );
    657  1.1  oster       fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector );
    658  1.1  oster       fsuend[0] = fsuoff[0] + pda->numSector;
    659  1.1  oster       startSector = fsuoff[0];
    660  1.1  oster       endSector = fsuend[0];
    661  1.1  oster 
    662  1.1  oster       /* find out the the column of failed disk being accessed */
    663  1.1  oster       fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress );
    664  1.1  oster 
    665  1.1  oster       /* find out the other failed colume not accessed */
    666  1.1  oster       sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    667  1.1  oster       for (i=0; i < numDataCol; i++) {
    668  1.1  oster           npda.raidAddress = sosAddr + (i * secPerSU);
    669  1.1  oster           (raidPtr->Layout.map->MapSector)(raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
    670  1.1  oster           /* skip over dead disks */
    671  1.1  oster           if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
    672  1.1  oster               if (i != fcol[0]) break;
    673  1.1  oster       }
    674  1.1  oster       RF_ASSERT (i < numDataCol);
    675  1.1  oster       fcol[1] = i;
    676  1.1  oster   }
    677  1.1  oster   else {
    678  1.1  oster       RF_ASSERT (  nresults == 2 );
    679  1.1  oster       pda0 = node->results[0];  bzero(pda0->bufPtr, bytesPerSector*pda0->numSector );
    680  1.1  oster       pda1 = node->results[1];  bzero(pda1->bufPtr, bytesPerSector*pda1->numSector );
    681  1.1  oster       /* determine the failed colume numbers of the two failed disks. */
    682  1.1  oster       fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress );
    683  1.1  oster       fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress );
    684  1.1  oster       /*  determine the offset and end offset of the access within each failed stripe unit. */
    685  1.1  oster       fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector );
    686  1.1  oster       fsuend[0] = fsuoff[0] + pda0->numSector;
    687  1.1  oster       fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector );
    688  1.1  oster       fsuend[1] = fsuoff[1] + pda1->numSector;
    689  1.1  oster       /*  determine the startSector to begin decoding */
    690  1.1  oster       startSector = RF_MIN( pda0->startSector, pda1->startSector );
    691  1.1  oster       /*  determine the endSector to end decoding */
    692  1.1  oster       endSector = RF_MAX( fsuend[0], fsuend[1] );
    693  1.1  oster   }
    694  1.1  oster   /*
    695  1.1  oster 	assign the beginning sector and the end sector for each parameter
    696  1.1  oster 	find out the corresponding colume # for each parameter
    697  1.1  oster   */
    698  1.1  oster   for( prm=0; prm < ndataParam; prm++ ) {
    699  1.1  oster       pda = node->params[prm].p;
    700  1.1  oster       suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    701  1.1  oster       suend[prm] = suoff[prm] + pda->numSector;
    702  1.1  oster       prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress );
    703  1.1  oster   }
    704  1.1  oster   /* 'sector' is the sector for the current decoding algorithm. For each sector in the failed SU,
    705  1.1  oster      find out the corresponding parameters that cover the current sector and that are needed for
    706  1.1  oster      decoding of this sector in failed SU. 2.  Find out if sector is in the shadow of any accessed
    707  1.1  oster      failed SU. If not, malloc a temporary space of a sector in size.
    708  1.1  oster   */
    709  1.1  oster   for( sector = startSector; sector < endSector; sector++ ){
    710  1.1  oster      if ( nresults == 2 )
    711  1.1  oster 	  if( !(fsuoff[0]<=sector && sector<fsuend[0]) && !(fsuoff[1]<=sector && sector<fsuend[1]) )continue;
    712  1.1  oster      for( prm=0; prm < ndataParam; prm++ )
    713  1.1  oster           if( suoff[prm] <= sector && sector < suend[prm] )
    714  1.1  oster                buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *)node->params[prm].p)->bufPtr +
    715  1.1  oster 					rf_RaidAddressToByte(raidPtr, sector-suoff[prm]);
    716  1.1  oster      /* find out if sector is in the shadow of any accessed failed SU. If yes, assign dest[0], dest[1] to point
    717  1.1  oster 	 at suitable position of the buffer corresponding to failed SUs. if no, malloc a temporary space of
    718  1.1  oster 	 a sector in size for destination of decoding.
    719  1.1  oster       */
    720  1.1  oster      RF_ASSERT( nresults == 1 || nresults == 2 );
    721  1.1  oster      if ( nresults == 1) {
    722  1.1  oster            dest[0] = ((RF_PhysDiskAddr_t *)node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector-fsuoff[0]);
    723  1.1  oster            /* Always malloc temp buffer to dest[1]  */
    724  1.1  oster            RF_Malloc( dest[1], bytesPerSector, (char *) );
    725  1.1  oster 	   bzero(dest[1],bytesPerSector); mallc_two = 1; }
    726  1.1  oster       else {
    727  1.1  oster            if( fsuoff[0] <= sector && sector < fsuend[0] )
    728  1.1  oster                   dest[0] = ((RF_PhysDiskAddr_t *)node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector-fsuoff[0]);
    729  1.1  oster            else { RF_Malloc( dest[0], bytesPerSector, (char *) );
    730  1.1  oster 		  bzero(dest[0],bytesPerSector); mallc_one = 1; }
    731  1.1  oster            if( fsuoff[1] <= sector && sector < fsuend[1] )
    732  1.1  oster                   dest[1] = ((RF_PhysDiskAddr_t *)node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector-fsuoff[1]);
    733  1.1  oster            else { RF_Malloc( dest[1], bytesPerSector, (char *) );
    734  1.1  oster                   bzero(dest[1],bytesPerSector); mallc_two = 1; }
    735  1.1  oster            RF_ASSERT( mallc_one == 0 || mallc_two == 0 );
    736  1.1  oster       }
    737  1.1  oster       pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector-psuoff );
    738  1.1  oster       ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector-esuoff );
    739  1.1  oster       /*
    740  1.1  oster        * After finish finding all needed sectors, call doubleEOdecode function for decoding
    741  1.1  oster        * one sector to destination.
    742  1.1  oster        */
    743  1.1  oster       rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf );
    744  1.1  oster       /* free all allocated memory, and mark flag to indicate no memory is being allocated */
    745  1.1  oster       if( mallc_one == 1) RF_Free( dest[0], bytesPerSector );
    746  1.1  oster       if( mallc_two == 1) RF_Free( dest[1], bytesPerSector );
    747  1.1  oster       mallc_one = mallc_two = 0;
    748  1.1  oster   }
    749  1.1  oster   RF_Free(buf, numDataCol*sizeof(char *));
    750  1.1  oster   if (ndataParam != 0){
    751  1.1  oster       RF_Free(suoff, ndataParam*sizeof(long));
    752  1.1  oster       RF_Free(suend, ndataParam*sizeof(long));
    753  1.1  oster       RF_Free(prmToCol, ndataParam*sizeof(long));
    754  1.1  oster   }
    755  1.1  oster 
    756  1.1  oster   RF_ETIMER_STOP(timer);
    757  1.1  oster   RF_ETIMER_EVAL(timer);
    758  1.1  oster   if (tracerec) {
    759  1.1  oster     tracerec->q_us += RF_ETIMER_VAL_US(timer);
    760  1.1  oster   }
    761  1.1  oster   rf_GenericWakeupFunc(node,0);
    762  1.1  oster #if 1
    763  1.1  oster   return(0); /* XXX is this even close!!?!?!!? GO */
    764  1.1  oster #endif
    765  1.1  oster }
    766  1.1  oster 
    767  1.1  oster 
    768  1.1  oster /* currently, only access of one of the two failed SU is allowed in this function.
    769  1.1  oster  * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
    770  1.1  oster  * many accesses of single stripe unit.
    771  1.1  oster  */
    772  1.1  oster 
    773  1.1  oster int rf_EOWriteDoubleRecoveryFunc(node)
    774  1.1  oster   RF_DagNode_t  *node;
    775  1.1  oster {
    776  1.1  oster   int np = node->numParams;
    777  1.1  oster   RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np-1].p;
    778  1.1  oster   RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-2].p;
    779  1.1  oster   RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
    780  1.1  oster   RF_SectorNum_t sector;
    781  1.1  oster   RF_RowCol_t col, scol;
    782  1.1  oster   int prm, i, j;
    783  1.1  oster   RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    784  1.1  oster   unsigned sosAddr;
    785  1.1  oster   unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1 );
    786  1.1  oster   RF_int64 numbytes;
    787  1.1  oster   RF_SectorNum_t startSector, endSector;
    788  1.1  oster   RF_PhysDiskAddr_t *ppda,*epda,*pda, *fpda, npda;
    789  1.1  oster   RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
    790  1.1  oster   char **buf; /* buf[0], buf[1], buf[2], ...etc. point to buffer storing data read from col0, col1, col2 */
    791  1.1  oster   char *ebuf, *pbuf, *dest[2], *olddata[2];
    792  1.1  oster   RF_Etimer_t timer;
    793  1.1  oster   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    794  1.1  oster 
    795  1.1  oster   RF_ASSERT( asmap->numDataFailed == 1 ); /* currently only support this case, the other failed SU is not being accessed */
    796  1.1  oster   RF_ETIMER_START(timer);
    797  1.1  oster   RF_Malloc(buf, numDataCol*sizeof(char *), (char **));
    798  1.1  oster 
    799  1.1  oster   ppda = node->results[0];            /* Instead of being buffers, node->results[0] and [1] are Ppda and Epda  */
    800  1.1  oster   epda = node->results[1];
    801  1.1  oster   fpda = asmap->failedPDAs[0];
    802  1.1  oster 
    803  1.1  oster   /* First, recovery the failed old SU using EvenOdd double decoding      */
    804  1.1  oster   /* determine the startSector and endSector for decoding */
    805  1.1  oster   startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector );
    806  1.1  oster   endSector = startSector + fpda->numSector;
    807  1.1  oster   /* Assign buf[col] pointers to point to each non-failed colume  and initialize the pbuf
    808  1.1  oster      and ebuf to point at the beginning of each source buffers and destination buffers */
    809  1.1  oster   for( prm=0; prm < numDataCol-2; prm++ ) {
    810  1.1  oster       pda = (RF_PhysDiskAddr_t *)node->params[prm].p;
    811  1.1  oster       col = rf_EUCol(layoutPtr, pda->raidAddress );
    812  1.1  oster       buf[col] = pda->bufPtr;
    813  1.1  oster   }
    814  1.1  oster   /*  pbuf and ebuf:  they will change values as double recovery decoding goes on */
    815  1.1  oster   pbuf = ppda->bufPtr;
    816  1.1  oster   ebuf = epda->bufPtr;
    817  1.1  oster   /* find out the logical colume numbers in the encoding matrix of the two failed columes */
    818  1.1  oster   fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress );
    819  1.1  oster 
    820  1.1  oster   /* find out the other failed colume not accessed this time */
    821  1.1  oster   sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    822  1.1  oster   for (i=0; i < numDataCol; i++) {
    823  1.1  oster       npda.raidAddress = sosAddr + (i * secPerSU);
    824  1.1  oster       (raidPtr->Layout.map->MapSector)(raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
    825  1.1  oster       /* skip over dead disks */
    826  1.1  oster       if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
    827  1.1  oster       if (i != fcol[0]) break;
    828  1.1  oster   }
    829  1.1  oster   RF_ASSERT (i < numDataCol);
    830  1.1  oster   fcol[1] = i;
    831  1.1  oster   /* assign temporary space to put recovered failed SU */
    832  1.1  oster   numbytes = fpda->numSector * bytesPerSector;
    833  1.1  oster   RF_Malloc(olddata[0], numbytes, (char *) );
    834  1.1  oster   RF_Malloc(olddata[1], numbytes, (char *) );
    835  1.1  oster   dest[0] = olddata[0];
    836  1.1  oster   dest[1] = olddata[1];
    837  1.1  oster   bzero(olddata[0], numbytes);
    838  1.1  oster   bzero(olddata[1], numbytes);
    839  1.1  oster   /* Begin the recovery decoding, initially buf[j],  ebuf, pbuf, dest[j] have already
    840  1.1  oster      pointed at the beginning of each source buffers and destination buffers */
    841  1.1  oster   for( sector = startSector, i=0; sector < endSector; sector++ , i++){
    842  1.1  oster       rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf );
    843  1.1  oster       for (j=0; j < numDataCol; j++)
    844  1.1  oster            if( ( j != fcol[0]) && ( j != fcol[1] ) ) buf[j] += bytesPerSector;
    845  1.1  oster       dest[0] += bytesPerSector;
    846  1.1  oster       dest[1] += bytesPerSector;
    847  1.1  oster       ebuf += bytesPerSector;
    848  1.1  oster       pbuf += bytesPerSector;
    849  1.1  oster   }
    850  1.1  oster   /* after recovery, the buffer pointed by olddata[0] is the old failed data.
    851  1.1  oster      With new writing data and this old data, use small write to calculate
    852  1.1  oster      the new redundant informations
    853  1.1  oster    */
    854  1.1  oster   /*  node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of Rrd;
    855  1.1  oster             params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ;
    856  1.1  oster             params[ PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1]
    857  1.1  oster       	            are Pdas of wudNodes;
    858  1.1  oster       For current implementation, we assume the simplest case:
    859  1.1  oster            asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1 ie. PDAPerDisk = 1
    860  1.1  oster       then node->params[numDataCol] must be the new data to be writen to the failed disk. We first bxor the new data
    861  1.1  oster       into the old recovered data, then do the same things as small write.
    862  1.1  oster    */
    863  1.1  oster 
    864  1.1  oster   rf_bxor( ((RF_PhysDiskAddr_t *)node->params[numDataCol].p)->bufPtr, olddata[0], numbytes, node->dagHdr->bp);
    865  1.1  oster   /*  do new 'E' calculation  */
    866  1.1  oster   /*  find out the corresponding colume in encoding matrix for write colume to be encoded into redundant disk 'E' */
    867  1.1  oster   scol = rf_EUCol(layoutPtr, fpda->raidAddress );
    868  1.1  oster   /*  olddata[0] now is source buffer pointer; epda->bufPtr is the dest buffer pointer               */
    869  1.1  oster   rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
    870  1.1  oster 
    871  1.1  oster   /*  do new 'P' calculation  */
    872  1.1  oster    rf_bxor( olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp);
    873  1.1  oster   /* Free the allocated buffer  */
    874  1.1  oster   RF_Free( olddata[0], numbytes );
    875  1.1  oster   RF_Free( olddata[1], numbytes );
    876  1.1  oster   RF_Free( buf, numDataCol*sizeof(char *));
    877  1.1  oster 
    878  1.1  oster   RF_ETIMER_STOP(timer);
    879  1.1  oster   RF_ETIMER_EVAL(timer);
    880  1.1  oster   if (tracerec) {
    881  1.1  oster     tracerec->q_us += RF_ETIMER_VAL_US(timer);
    882  1.1  oster   }
    883  1.1  oster 
    884  1.1  oster   rf_GenericWakeupFunc(node,0);
    885  1.1  oster   return(0);
    886  1.1  oster }
    887