Home | History | Annotate | Line # | Download | only in raidframe
rf_pq.c revision 1.1
      1 /*	$NetBSD: rf_pq.c,v 1.1 1998/11/13 04:20:32 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Daniel Stodolsky
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  * Code for RAID level 6 (P + Q) disk array architecture.
     31  *
     32  * :
     33  * Log: rf_pq.c,v
     34  * Revision 1.33  1996/11/05 21:10:40  jimz
     35  * failed pda generalization
     36  *
     37  * Revision 1.32  1996/07/31  16:29:50  jimz
     38  * "fix" math on 32-bit machines using RF_LONGSHIFT
     39  * (may be incorrect)
     40  *
     41  * Revision 1.31  1996/07/31  15:35:01  jimz
     42  * evenodd changes; bugfixes for double-degraded archs, generalize
     43  * some formerly PQ-only functions
     44  *
     45  * Revision 1.30  1996/07/27  23:36:08  jimz
     46  * Solaris port of simulator
     47  *
     48  * Revision 1.29  1996/07/22  19:52:16  jimz
     49  * switched node params to RF_DagParam_t, a union of
     50  * a 64-bit int and a void *, for better portability
     51  * attempted hpux port, but failed partway through for
     52  * lack of a single C compiler capable of compiling all
     53  * source files
     54  *
     55  * Revision 1.28  1996/06/09  02:36:46  jimz
     56  * lots of little crufty cleanup- fixup whitespace
     57  * issues, comment #ifdefs, improve typing in some
     58  * places (esp size-related)
     59  *
     60  * Revision 1.27  1996/06/07  21:33:04  jimz
     61  * begin using consistent types for sector numbers,
     62  * stripe numbers, row+col numbers, recon unit numbers
     63  *
     64  * Revision 1.26  1996/06/02  17:31:48  jimz
     65  * Moved a lot of global stuff into array structure, where it belongs.
     66  * Fixed up paritylogging, pss modules in this manner. Some general
     67  * code cleanup. Removed lots of dead code, some dead files.
     68  *
     69  * Revision 1.25  1996/05/31  22:26:54  jimz
     70  * fix a lot of mapping problems, memory allocation problems
     71  * found some weird lock issues, fixed 'em
     72  * more code cleanup
     73  *
     74  * Revision 1.24  1996/05/30  23:22:16  jimz
     75  * bugfixes of serialization, timing problems
     76  * more cleanup
     77  *
     78  * Revision 1.23  1996/05/30  12:59:18  jimz
     79  * make etimer happier, more portable
     80  *
     81  * Revision 1.22  1996/05/27  18:56:37  jimz
     82  * more code cleanup
     83  * better typing
     84  * compiles in all 3 environments
     85  *
     86  * Revision 1.21  1996/05/24  22:17:04  jimz
     87  * continue code + namespace cleanup
     88  * typed a bunch of flags
     89  *
     90  * Revision 1.20  1996/05/24  04:28:55  jimz
     91  * release cleanup ckpt
     92  *
     93  * Revision 1.19  1996/05/23  21:46:35  jimz
     94  * checkpoint in code cleanup (release prep)
     95  * lots of types, function names have been fixed
     96  *
     97  * Revision 1.18  1996/05/23  00:33:23  jimz
     98  * code cleanup: move all debug decls to rf_options.c, all extern
     99  * debug decls to rf_options.h, all debug vars preceded by rf_
    100  *
    101  * Revision 1.17  1996/05/18  19:51:34  jimz
    102  * major code cleanup- fix syntax, make some types consistent,
    103  * add prototypes, clean out dead code, et cetera
    104  *
    105  * Revision 1.16  1996/05/17  14:52:04  wvcii
    106  * added prototyping to QDelta()
    107  *   - changed buf params from volatile unsigned long * to char *
    108  * changed QDelta for kernel
    109  *   - just bzero the buf since kernel doesn't include pq decode table
    110  *
    111  * Revision 1.15  1996/05/03  19:40:20  wvcii
    112  * added includes for dag library
    113  *
    114  * Revision 1.14  1995/12/12  18:10:06  jimz
    115  * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
    116  * fix 80-column brain damage in comments
    117  *
    118  * Revision 1.13  1995/11/30  16:19:55  wvcii
    119  * added copyright info
    120  *
    121  * Revision 1.12  1995/11/07  16:13:47  wvcii
    122  * changed PQDagSelect prototype
    123  * function no longer returns numHdrSucc, numTermAnt
    124  * note:  this file contains node functions which should be
    125  * moved to rf_dagfuncs.c so that all node funcs are bundled together
    126  *
    127  * Revision 1.11  1995/10/04  03:50:33  wvcii
    128  * removed panics, minor code cleanup in dag selection
    129  *
    130  *
    131  */
    132 
    133 #include "rf_archs.h"
    134 #include "rf_types.h"
    135 #include "rf_raid.h"
    136 #include "rf_dag.h"
    137 #include "rf_dagffrd.h"
    138 #include "rf_dagffwr.h"
    139 #include "rf_dagdegrd.h"
    140 #include "rf_dagdegwr.h"
    141 #include "rf_dagutils.h"
    142 #include "rf_dagfuncs.h"
    143 #include "rf_threadid.h"
    144 #include "rf_etimer.h"
    145 #include "rf_pqdeg.h"
    146 #include "rf_general.h"
    147 #include "rf_map.h"
    148 #include "rf_pq.h"
    149 #include "rf_sys.h"
    150 
    151 RF_RedFuncs_t rf_pFuncs = { rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P" };
    152 RF_RedFuncs_t rf_pRecoveryFuncs = { rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func" };
    153 
    154 int rf_RegularONPFunc(node)
    155   RF_DagNode_t  *node;
    156 {
    157   return(rf_RegularXorFunc(node));
    158 }
    159 
    160 /*
    161    same as simpleONQ func, but the coefficient is always 1
    162 */
    163 
    164 int rf_SimpleONPFunc(node)
    165   RF_DagNode_t  *node;
    166 {
    167   return(rf_SimpleXorFunc(node));
    168 }
    169 
    170 int rf_RecoveryPFunc(node)
    171 RF_DagNode_t *node;
    172 {
    173   return(rf_RecoveryXorFunc(node));
    174 }
    175 
    176 int rf_RegularPFunc(node)
    177   RF_DagNode_t  *node;
    178 {
    179   return(rf_RegularXorFunc(node));
    180 }
    181 
    182 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    183 
    184 static void QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
    185 	unsigned char coeff);
    186 static void rf_InvertQ(unsigned long *qbuf, unsigned long *abuf,
    187 	unsigned length, unsigned coeff);
    188 
    189 RF_RedFuncs_t rf_qFuncs = { rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q" };
    190 RF_RedFuncs_t rf_qRecoveryFuncs = { rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func" };
    191 RF_RedFuncs_t rf_pqRecoveryFuncs = { rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func" };
    192 
    193 void rf_PQDagSelect(
    194   RF_Raid_t             *raidPtr,
    195   RF_IoType_t            type,
    196   RF_AccessStripeMap_t  *asmap,
    197   RF_VoidFuncPtr        *createFunc)
    198 {
    199   RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
    200   unsigned ndfail = asmap->numDataFailed;
    201   unsigned npfail = asmap->numParityFailed;
    202   unsigned ntfail = npfail + ndfail;
    203 
    204   RF_ASSERT(RF_IO_IS_R_OR_W(type));
    205   if (ntfail > 2)
    206     {
    207       RF_ERRORMSG("more than two disks failed in a single group!  Aborting I/O operation.\n");
    208       /* *infoFunc = */ *createFunc = NULL;
    209       return;
    210     }
    211 
    212   /* ok, we can do this I/O */
    213   if (type == RF_IO_TYPE_READ)
    214     {
    215       switch (ndfail)
    216 	{
    217 	case 0:
    218 	  /* fault free read */
    219 	  *createFunc = rf_CreateFaultFreeReadDAG;   /* same as raid 5 */
    220 	  break;
    221 	case 1:
    222 	  /* lost a single data unit */
    223 	  /* two cases:
    224 	        (1) parity is not lost.
    225 		    do a normal raid 5 reconstruct read.
    226 		(2) parity is lost.
    227 		    do a reconstruct read using "q".
    228           */
    229 	  if (ntfail == 2) /* also lost redundancy */
    230 	    {
    231 	      if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
    232 		*createFunc = rf_PQ_110_CreateReadDAG;
    233 	      else
    234 		*createFunc = rf_PQ_101_CreateReadDAG;
    235 	    }
    236 	  else
    237 	    {
    238 	      /* P and Q are ok. But is there a failure
    239 		 in some unaccessed data unit?
    240               */
    241 	      if (rf_NumFailedDataUnitsInStripe(raidPtr,asmap)==2)
    242 		*createFunc = rf_PQ_200_CreateReadDAG;
    243 	      else
    244 		  *createFunc = rf_PQ_100_CreateReadDAG;
    245 	    }
    246 	  break;
    247 	case 2:
    248 	  /* lost two data units */
    249 	  /* *infoFunc = PQOneTwo; */
    250 	  *createFunc = rf_PQ_200_CreateReadDAG;
    251 	  break;
    252 	}
    253       return;
    254     }
    255 
    256   /* a write */
    257   switch (ntfail)
    258     {
    259     case 0: /* fault free */
    260       if (rf_suppressLocksAndLargeWrites ||
    261 	  (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
    262 	   (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
    263 
    264 	*createFunc = rf_PQCreateSmallWriteDAG;
    265       }
    266       else {
    267 	*createFunc = rf_PQCreateLargeWriteDAG;
    268       }
    269       break;
    270 
    271     case 1: /* single disk fault */
    272       if (npfail==1)
    273 	{
    274 	  RF_ASSERT ((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) ||  (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
    275 	  if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)
    276 	    { /* q died, treat like normal mode raid5 write.*/
    277 	      if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
    278 		  || rf_NumFailedDataUnitsInStripe(raidPtr,asmap))
    279 		*createFunc = rf_PQ_001_CreateSmallWriteDAG;
    280 	      else
    281 		*createFunc = rf_PQ_001_CreateLargeWriteDAG;
    282 	    }
    283 	  else
    284 	    { /* parity died, small write only updating Q */
    285 	      if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
    286 		  || rf_NumFailedDataUnitsInStripe(raidPtr,asmap))
    287 		*createFunc = rf_PQ_010_CreateSmallWriteDAG;
    288 	      else
    289 		*createFunc = rf_PQ_010_CreateLargeWriteDAG;
    290 	    }
    291 	}
    292       else
    293 	{ /* data missing.
    294 	     Do a P reconstruct write if only a single data unit
    295 	     is lost in the stripe, otherwise a PQ reconstruct
    296 	     write. */
    297 	  if (rf_NumFailedDataUnitsInStripe(raidPtr,asmap)==2)
    298 	    *createFunc = rf_PQ_200_CreateWriteDAG;
    299 	  else
    300 	    *createFunc = rf_PQ_100_CreateWriteDAG;
    301 	}
    302       break;
    303 
    304     case 2: /* two disk faults */
    305       switch (npfail)
    306 	{
    307 	case 2: /* both p and q dead */
    308 	  *createFunc = rf_PQ_011_CreateWriteDAG;
    309 	  break;
    310 	case 1: /* either p or q and dead data */
    311 	  RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
    312 	  RF_ASSERT ((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) ||  (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
    313 	  if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
    314 	    *createFunc = rf_PQ_101_CreateWriteDAG;
    315 	  else
    316 	    *createFunc = rf_PQ_110_CreateWriteDAG;
    317 	  break;
    318 	case 0: /* double data loss */
    319 	  *createFunc = rf_PQ_200_CreateWriteDAG;
    320 	  break;
    321 	}
    322       break;
    323 
    324     default:  /* more than 2 disk faults */
    325       *createFunc = NULL;
    326       RF_PANIC();
    327     }
    328   return;
    329 }
    330 
    331 /*
    332    Used as a stop gap info function
    333 */
    334 static void PQOne(raidPtr, nSucc, nAnte, asmap)
    335   RF_Raid_t             *raidPtr;
    336   int                   *nSucc;
    337   int                   *nAnte;
    338   RF_AccessStripeMap_t  *asmap;
    339 {
    340   *nSucc = *nAnte = 1;
    341 }
    342 
    343 static void PQOneTwo(raidPtr, nSucc, nAnte, asmap)
    344   RF_Raid_t             *raidPtr;
    345   int                   *nSucc;
    346   int                   *nAnte;
    347   RF_AccessStripeMap_t  *asmap;
    348 {
    349   *nSucc = 1;
    350   *nAnte = 2;
    351 }
    352 
    353 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
    354 {
    355   rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2,
    356     rf_RegularPQFunc, RF_FALSE);
    357 }
    358 
    359 int rf_RegularONQFunc(node)
    360   RF_DagNode_t  *node;
    361 {
    362   int np = node->numParams;
    363   int d;
    364   RF_Raid_t *raidPtr = (RF_Raid_t *)node->params[np-1].p;
    365   int i;
    366   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    367   RF_Etimer_t timer;
    368   char *qbuf, *qpbuf;
    369   char *obuf, *nbuf;
    370   RF_PhysDiskAddr_t *old, *new;
    371   unsigned long coeff;
    372   unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
    373 
    374   RF_ETIMER_START(timer);
    375 
    376   d = (np-3)/4;
    377   RF_ASSERT (4*d+3 == np);
    378   qbuf = (char *) node->params[2*d+1].p; /* q buffer*/
    379   for (i=0; i < d; i++)
    380     {
    381       old  = (RF_PhysDiskAddr_t *) node->params[2*i].p;
    382       obuf = (char *) node->params[2*i+1].p;
    383       new  = (RF_PhysDiskAddr_t *) node->params[2*(d+1+i)].p;
    384       nbuf = (char *) node->params[2*(d+1+i)+1].p;
    385       RF_ASSERT (new->numSector == old->numSector);
    386       RF_ASSERT (new->raidAddress == old->raidAddress);
    387       /* the stripe unit within the stripe tells us the coefficient to use
    388 	 for the multiply. */
    389       coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),new->raidAddress);
    390       /* compute the data unit offset within the column, then add one */
    391       coeff = (coeff % raidPtr->Layout.numDataCol);
    392       qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,old->startSector % secPerSU);
    393       QDelta(qpbuf,obuf,nbuf, rf_RaidAddressToByte(raidPtr, old->numSector),coeff);
    394     }
    395 
    396   RF_ETIMER_STOP(timer);
    397   RF_ETIMER_EVAL(timer);
    398   tracerec->q_us += RF_ETIMER_VAL_US(timer);
    399   rf_GenericWakeupFunc(node, 0);     /* call wake func explicitly since no I/O in this node */
    400   return(0);
    401 }
    402 
    403 /*
    404    See the SimpleXORFunc for the difference between a simple and regular func.
    405    These Q functions should be used for
    406 
    407          new q = Q(data,old data,old q)
    408 
    409    style updates and not for
    410 
    411          q = ( new data, new data, .... )
    412 
    413    computations.
    414 
    415    The simple q takes 2(2d+1)+1 params, where d is the number
    416    of stripes written. The order of params is
    417    old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
    418    [2d] old q pda_0, old q buffer
    419    [2d_2] new data pda_0, new data buffer_0, ...                                    new data pda_d, new data buffer_d
    420    raidPtr
    421 */
    422 
    423 int rf_SimpleONQFunc(node)
    424   RF_DagNode_t  *node;
    425 {
    426   int np = node->numParams;
    427   int d;
    428   RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-1].p;
    429   int i;
    430   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    431   RF_Etimer_t timer;
    432   char *qbuf;
    433   char *obuf, *nbuf;
    434   RF_PhysDiskAddr_t *old, *new;
    435   unsigned long coeff;
    436 
    437   RF_ETIMER_START(timer);
    438 
    439   d = (np-3)/4;
    440   RF_ASSERT (4*d+3 == np);
    441   qbuf = (char *) node->params[2*d+1].p; /* q buffer*/
    442   for (i=0; i < d; i++)
    443     {
    444       old  = (RF_PhysDiskAddr_t *) node->params[2*i].p;
    445       obuf = (char *) node->params[2*i+1].p;
    446       new  = (RF_PhysDiskAddr_t *) node->params[2*(d+1+i)].p;
    447       nbuf = (char *) node->params[2*(d+1+i)+1].p;
    448       RF_ASSERT (new->numSector == old->numSector);
    449       RF_ASSERT (new->raidAddress == old->raidAddress);
    450       /* the stripe unit within the stripe tells us the coefficient to use
    451 	 for the multiply. */
    452       coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),new->raidAddress);
    453       /* compute the data unit offset within the column, then add one */
    454       coeff = (coeff % raidPtr->Layout.numDataCol);
    455       QDelta(qbuf,obuf,nbuf, rf_RaidAddressToByte(raidPtr, old->numSector),coeff);
    456     }
    457 
    458   RF_ETIMER_STOP(timer);
    459   RF_ETIMER_EVAL(timer);
    460   tracerec->q_us += RF_ETIMER_VAL_US(timer);
    461   rf_GenericWakeupFunc(node, 0);     /* call wake func explicitly since no I/O in this node */
    462   return(0);
    463 }
    464 
    465 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
    466 {
    467   rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);
    468 }
    469 
    470 static void RegularQSubr(node,qbuf)
    471   RF_DagNode_t  *node;
    472   char          *qbuf;
    473 {
    474   int np = node->numParams;
    475   int d;
    476   RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-1].p;
    477   unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
    478   int i;
    479   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    480   RF_Etimer_t timer;
    481   char *obuf, *qpbuf;
    482   RF_PhysDiskAddr_t *old;
    483   unsigned long coeff;
    484 
    485   RF_ETIMER_START(timer);
    486 
    487   d = (np-1)/2;
    488   RF_ASSERT (2*d+1 == np);
    489   for (i=0; i < d; i++)
    490     {
    491       old  = (RF_PhysDiskAddr_t *) node->params[2*i].p;
    492       obuf = (char *) node->params[2*i+1].p;
    493       coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),old->raidAddress);
    494       /* compute the data unit offset within the column, then add one */
    495       coeff = (coeff % raidPtr->Layout.numDataCol);
    496       /* the input buffers may not all be aligned with the start of the
    497 	 stripe. so shift by their sector offset within the stripe unit */
    498       qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,old->startSector % secPerSU);
    499       rf_IncQ((unsigned long *)qpbuf,(unsigned long *)obuf,rf_RaidAddressToByte(raidPtr, old->numSector),coeff);
    500     }
    501 
    502   RF_ETIMER_STOP(timer);
    503   RF_ETIMER_EVAL(timer);
    504   tracerec->q_us += RF_ETIMER_VAL_US(timer);
    505 }
    506 
    507 /*
    508    used in degraded writes.
    509 */
    510 
    511 static void DegrQSubr(node)
    512   RF_DagNode_t  *node;
    513 {
    514   int np = node->numParams;
    515   int d;
    516   RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-1].p;
    517   unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
    518   int i;
    519   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    520   RF_Etimer_t timer;
    521   char *qbuf = node->results[1];
    522   char *obuf, *qpbuf;
    523   RF_PhysDiskAddr_t *old;
    524   unsigned long coeff;
    525   unsigned fail_start;
    526   int j;
    527 
    528   old = (RF_PhysDiskAddr_t *)node->params[np-2].p;
    529   fail_start = old->startSector % secPerSU;
    530 
    531   RF_ETIMER_START(timer);
    532 
    533   d = (np-2)/2;
    534   RF_ASSERT (2*d+2 == np);
    535   for (i=0; i < d; i++)
    536     {
    537       old  = (RF_PhysDiskAddr_t *) node->params[2*i].p;
    538       obuf = (char *) node->params[2*i+1].p;
    539       coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),old->raidAddress);
    540       /* compute the data unit offset within the column, then add one */
    541       coeff = (coeff % raidPtr->Layout.numDataCol);
    542       /* the input buffers may not all be aligned with the start of the
    543 	 stripe. so shift by their sector offset within the stripe unit */
    544       j = old->startSector % secPerSU;
    545       RF_ASSERT(j >= fail_start);
    546       qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,j - fail_start);
    547       rf_IncQ((unsigned long *)qpbuf,(unsigned long *)obuf,rf_RaidAddressToByte(raidPtr, old->numSector),coeff);
    548     }
    549 
    550   RF_ETIMER_STOP(timer);
    551   RF_ETIMER_EVAL(timer);
    552   tracerec->q_us += RF_ETIMER_VAL_US(timer);
    553 }
    554 
    555 /*
    556    Called by large write code to compute the new parity and the new q.
    557 
    558    structure of the params:
    559 
    560    pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
    561    raidPtr
    562 
    563    for a total of 2d+1 arguments.
    564    The result buffers results[0], results[1] are the buffers for the p and q,
    565    respectively.
    566 
    567    We compute Q first, then compute P. The P calculation may try to reuse
    568    one of the input buffers for its output, so if we computed P first, we would
    569    corrupt the input for the q calculation.
    570 */
    571 
    572 int rf_RegularPQFunc(node)
    573   RF_DagNode_t  *node;
    574 {
    575   RegularQSubr(node,node->results[1]);
    576   return(rf_RegularXorFunc(node)); /* does the wakeup */
    577 }
    578 
    579 int rf_RegularQFunc(node)
    580   RF_DagNode_t  *node;
    581 {
    582   /* Almost ... adjust Qsubr args */
    583   RegularQSubr(node, node->results[0]);
    584   rf_GenericWakeupFunc(node, 0);     /* call wake func explicitly since no I/O in this node */
    585   return(0);
    586 }
    587 
    588 /*
    589    Called by singly degraded write code to compute the new parity and the new q.
    590 
    591    structure of the params:
    592 
    593    pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
    594    failedPDA raidPtr
    595 
    596    for a total of 2d+2 arguments.
    597    The result buffers results[0], results[1] are the buffers for the parity and q,
    598    respectively.
    599 
    600    We compute Q first, then compute parity. The parity calculation may try to reuse
    601    one of the input buffers for its output, so if we computed parity first, we would
    602    corrupt the input for the q calculation.
    603 
    604    We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
    605 */
    606 
    607 void rf_Degraded_100_PQFunc(node)
    608   RF_DagNode_t  *node;
    609 {
    610   int np = node->numParams;
    611 
    612   RF_ASSERT (np >= 2);
    613   DegrQSubr(node);
    614   rf_RecoveryXorFunc(node);
    615 }
    616 
    617 
    618 /*
    619    The two below are used when reading a stripe with a single lost data unit.
    620    The parameters are
    621 
    622    pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
    623 
    624    and results[0] contains the data buffer. Which is originally zero-filled.
    625 
    626 */
    627 
    628 /* this Q func is used by the degraded-mode dag functions to recover lost data.
    629  * the second-to-last parameter is the PDA for the failed portion of the access.
    630  * the code here looks at this PDA and assumes that the xor target buffer is
    631  * equal in size to the number of sectors in the failed PDA.  It then uses
    632  * the other PDAs in the parameter list to determine where within the target
    633  * buffer the corresponding data should be xored.
    634  *
    635  * Recall the basic equation is
    636  *
    637  *     Q = ( data_1 + 2 * data_2 ... + k * data_k  ) mod 256
    638  *
    639  * so to recover data_j we need
    640  *
    641  *    J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
    642  *
    643  * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
    644  * copying Q into it. Then we need to do a table lookup to convert to solve
    645  *   data_j /= J
    646  *
    647  *
    648  */
    649 int rf_RecoveryQFunc(node)
    650   RF_DagNode_t  *node;
    651 {
    652   RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
    653   RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
    654   RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams-2].p;
    655   int i;
    656   RF_PhysDiskAddr_t *pda;
    657   RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr,failedPDA->startSector);
    658   char *srcbuf, *destbuf;
    659   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    660   RF_Etimer_t timer;
    661   unsigned long coeff;
    662 
    663   RF_ETIMER_START(timer);
    664   /* start by copying Q into the buffer */
    665   bcopy(node->params[node->numParams-3].p,node->results[0],
    666     rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
    667   for (i=0; i<node->numParams-4; i+=2)
    668     {
    669       RF_ASSERT (node->params[i+1].p != node->results[0]);
    670       pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    671       srcbuf = (char *) node->params[i+1].p;
    672       suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    673       destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr,suoffset-failedSUOffset);
    674       coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),pda->raidAddress);
    675       /* compute the data unit offset within the column */
    676       coeff = (coeff % raidPtr->Layout.numDataCol);
    677       rf_IncQ((unsigned long *)destbuf, (unsigned long *)srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
    678   }
    679   /* Do the nasty inversion now */
    680   coeff =  (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),failedPDA->startSector) % raidPtr->Layout.numDataCol);
    681   rf_InvertQ(node->results[0],node->results[0],rf_RaidAddressToByte(raidPtr,pda->numSector),coeff);
    682   RF_ETIMER_STOP(timer);
    683   RF_ETIMER_EVAL(timer);
    684   tracerec->q_us += RF_ETIMER_VAL_US(timer);
    685   rf_GenericWakeupFunc(node, 0);
    686   return(0);
    687 }
    688 
    689 int rf_RecoveryPQFunc(node)
    690   RF_DagNode_t  *node;
    691 {
    692   RF_PANIC();
    693   return(1);
    694 }
    695 
    696 /*
    697    Degraded write Q subroutine.
    698    Used when P is dead.
    699    Large-write style Q computation.
    700    Parameters
    701 
    702    (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
    703 
    704    We ignore failedPDA.
    705 
    706    This is a "simple style" recovery func.
    707 */
    708 
    709 void rf_PQ_DegradedWriteQFunc(node)
    710   RF_DagNode_t  *node;
    711 {
    712   int np = node->numParams;
    713   int d;
    714   RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-1].p;
    715   unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
    716   int i;
    717   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    718   RF_Etimer_t timer;
    719   char *qbuf = node->results[0];
    720   char *obuf, *qpbuf;
    721   RF_PhysDiskAddr_t *old;
    722   unsigned long coeff;
    723   int fail_start,j;
    724 
    725   old = (RF_PhysDiskAddr_t *) node->params[np-2].p;
    726   fail_start = old->startSector % secPerSU;
    727 
    728   RF_ETIMER_START(timer);
    729 
    730   d = (np-2)/2;
    731   RF_ASSERT (2*d+2 == np);
    732 
    733   for (i=0; i < d; i++)
    734     {
    735       old  = (RF_PhysDiskAddr_t *) node->params[2*i].p;
    736       obuf = (char *) node->params[2*i+1].p;
    737       coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),old->raidAddress);
    738       /* compute the data unit offset within the column, then add one */
    739       coeff = (coeff % raidPtr->Layout.numDataCol);
    740       j = old->startSector % secPerSU;
    741       RF_ASSERT(j >= fail_start);
    742       qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,j - fail_start);
    743       rf_IncQ((unsigned long *)qpbuf,(unsigned long *)obuf,rf_RaidAddressToByte(raidPtr, old->numSector),coeff);
    744     }
    745 
    746   RF_ETIMER_STOP(timer);
    747   RF_ETIMER_EVAL(timer);
    748   tracerec->q_us += RF_ETIMER_VAL_US(timer);
    749   rf_GenericWakeupFunc(node, 0);
    750 }
    751 
    752 
    753 
    754 
    755 /* Q computations */
    756 
    757 /*
    758    coeff - colummn;
    759 
    760    compute  dest ^= qfor[28-coeff][rn[coeff+1] a]
    761 
    762    on 5-bit basis;
    763    length in bytes;
    764 */
    765 
    766 void rf_IncQ(dest,buf,length,coeff)
    767   unsigned long   *dest;
    768   unsigned long   *buf;
    769   unsigned         length;
    770   unsigned         coeff;
    771 {
    772   unsigned long a, d, new;
    773   unsigned long a1, a2;
    774   unsigned int *q = &(rf_qfor[28-coeff][0]);
    775   unsigned r = rf_rn[coeff+1];
    776 
    777 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
    778 #define INSERT(a,i) (a << (5L*i))
    779 
    780   length /= 8;
    781   /* 13 5 bit quants in a 64 bit word */
    782   while (length)
    783     {
    784       a = *buf++;
    785       d = *dest;
    786       a1 = EXTRACT(a,0) ^ r;
    787       a2 = EXTRACT(a,1) ^ r;
    788       new = INSERT(a2,1) | a1 ;
    789       a1 = EXTRACT(a,2) ^ r;
    790       a2 = EXTRACT(a,3) ^ r;
    791       a1 = q[a1];
    792       a2 = q[a2];
    793       new = new | INSERT(a1,2) | INSERT (a2,3);
    794       a1 = EXTRACT(a,4) ^ r;
    795       a2 = EXTRACT(a,5) ^ r;
    796       a1 = q[a1];
    797       a2 = q[a2];
    798       new = new | INSERT(a1,4) | INSERT (a2,5);
    799       a1 = EXTRACT(a,5) ^ r;
    800       a2 = EXTRACT(a,6) ^ r;
    801       a1 = q[a1];
    802       a2 = q[a2];
    803       new = new | INSERT(a1,5) | INSERT (a2,6);
    804 #if RF_LONGSHIFT > 2
    805       a1 = EXTRACT(a,7) ^ r;
    806       a2 = EXTRACT(a,8) ^ r;
    807       a1 = q[a1];
    808       a2 = q[a2];
    809       new = new | INSERT(a1,7) | INSERT (a2,8);
    810       a1 = EXTRACT(a,9) ^ r;
    811       a2 = EXTRACT(a,10) ^ r;
    812       a1 = q[a1];
    813       a2 = q[a2];
    814       new = new | INSERT(a1,9) | INSERT (a2,10);
    815       a1 = EXTRACT(a,11) ^ r;
    816       a2 = EXTRACT(a,12) ^ r;
    817       a1 = q[a1];
    818       a2 = q[a2];
    819       new = new | INSERT(a1,11) | INSERT (a2,12);
    820 #endif /* RF_LONGSHIFT > 2 */
    821       d ^= new;
    822       *dest++ = d;
    823       length--;
    824     }
    825 }
    826 
    827 /*
    828    compute
    829 
    830    dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
    831 
    832    on a five bit basis.
    833    optimization: compute old ^ new on 64 bit basis.
    834 
    835    length in bytes.
    836 */
    837 
    838 static void QDelta(
    839   char           *dest,
    840   char           *obuf,
    841   char           *nbuf,
    842   unsigned        length,
    843   unsigned char   coeff)
    844 {
    845   unsigned long a, d, new;
    846   unsigned long a1, a2;
    847   unsigned int *q = &(rf_qfor[28-coeff][0]);
    848   unsigned r = rf_rn[coeff+1];
    849 
    850 #ifdef KERNEL
    851   /* PQ in kernel currently not supported because the encoding/decoding table is not present */
    852   bzero(dest, length);
    853 #else  /* KERNEL */
    854   /* this code probably doesn't work and should be rewritten  -wvcii */
    855   /* 13 5 bit quants in a 64 bit word */
    856   length /= 8;
    857   while (length)
    858     {
    859       a = *obuf++; /* XXX need to reorg to avoid cache conflicts */
    860       a ^= *nbuf++;
    861       d = *dest;
    862       a1 = EXTRACT(a,0) ^ r;
    863       a2 = EXTRACT(a,1) ^ r;
    864       a1 = q[a1];
    865       a2 = q[a2];
    866       new = INSERT(a2,1) | a1 ;
    867       a1 = EXTRACT(a,2) ^ r;
    868       a2 = EXTRACT(a,3) ^ r;
    869       a1 = q[a1];
    870       a2 = q[a2];
    871       new = new | INSERT(a1,2) | INSERT (a2,3);
    872       a1 = EXTRACT(a,4) ^ r;
    873       a2 = EXTRACT(a,5) ^ r;
    874       a1 = q[a1];
    875       a2 = q[a2];
    876       new = new | INSERT(a1,4) | INSERT (a2,5);
    877       a1 = EXTRACT(a,5) ^ r;
    878       a2 = EXTRACT(a,6) ^ r;
    879       a1 = q[a1];
    880       a2 = q[a2];
    881       new = new | INSERT(a1,5) | INSERT (a2,6);
    882 #if RF_LONGSHIFT > 2
    883       a1 = EXTRACT(a,7) ^ r;
    884       a2 = EXTRACT(a,8) ^ r;
    885       a1 = q[a1];
    886       a2 = q[a2];
    887       new = new | INSERT(a1,7) | INSERT (a2,8);
    888       a1 = EXTRACT(a,9) ^ r;
    889       a2 = EXTRACT(a,10) ^ r;
    890       a1 = q[a1];
    891       a2 = q[a2];
    892       new = new | INSERT(a1,9) | INSERT (a2,10);
    893       a1 = EXTRACT(a,11) ^ r;
    894       a2 = EXTRACT(a,12) ^ r;
    895       a1 = q[a1];
    896       a2 = q[a2];
    897       new = new | INSERT(a1,11) | INSERT (a2,12);
    898 #endif /* RF_LONGSHIFT > 2 */
    899       d ^= new;
    900       *dest++ = d;
    901       length--;
    902     }
    903 #endif  /* KERNEL */
    904 }
    905 
    906 /*
    907    recover columns a and b from the given p and q into
    908    bufs abuf and bbuf. All bufs are word aligned.
    909    Length is in bytes.
    910 */
    911 
    912 
    913 /*
    914  * XXX
    915  *
    916  * Everything about this seems wrong.
    917  */
    918 void rf_PQ_recover(pbuf,qbuf,abuf,bbuf,length,coeff_a,coeff_b)
    919   unsigned long  *pbuf;
    920   unsigned long  *qbuf;
    921   unsigned long  *abuf;
    922   unsigned long  *bbuf;
    923   unsigned        length;
    924   unsigned        coeff_a;
    925   unsigned        coeff_b;
    926 {
    927   unsigned long p, q, a, a0, a1;
    928   int col = (29 * coeff_a) + coeff_b;
    929   unsigned char *q0 = & (rf_qinv[col][0]);
    930 
    931   length /= 8;
    932   while (length)
    933     {
    934       p  = *pbuf++;
    935       q  = *qbuf++;
    936       a0 = EXTRACT(p,0);
    937       a1 = EXTRACT(q,0);
    938       a  = q0[a0<<5 | a1];
    939 #define MF(i) \
    940       a0 = EXTRACT(p,i); \
    941       a1 = EXTRACT(q,i); \
    942       a  = a | INSERT(q0[a0<<5 | a1],i)
    943 
    944       MF(1);
    945       MF(2);
    946       MF(3);
    947       MF(4);
    948       MF(5);
    949       MF(6);
    950 #if 0
    951       MF(7);
    952       MF(8);
    953       MF(9);
    954       MF(10);
    955       MF(11);
    956       MF(12);
    957 #endif /* 0 */
    958       *abuf++ = a;
    959       *bbuf++ = a ^ p;
    960       length--;
    961     }
    962 }
    963 
    964 /*
    965    Lost parity and a data column. Recover that data column.
    966    Assume col coeff is lost. Let q the contents of Q after
    967    all surviving data columns have been q-xored out of it.
    968    Then we have the equation
    969 
    970    q[28-coeff][a_i ^ r_i+1] = q
    971 
    972    but q is cyclic with period 31.
    973    So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
    974       q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
    975 
    976    so a_i = r_{coeff+1} ^ q[3+coeff][q]
    977 
    978    The routine is passed q buffer and the buffer
    979    the data is to be recoverd into. They can be the same.
    980 */
    981 
    982 
    983 
    984 static void rf_InvertQ(
    985   unsigned long  *qbuf,
    986   unsigned long  *abuf,
    987   unsigned        length,
    988   unsigned        coeff)
    989 {
    990   unsigned long a, new;
    991   unsigned long a1, a2;
    992   unsigned int *q = &(rf_qfor[3+coeff][0]);
    993   unsigned r = rf_rn[coeff+1];
    994 
    995   /* 13 5 bit quants in a 64 bit word */
    996   length /= 8;
    997   while (length)
    998     {
    999       a = *qbuf++;
   1000       a1 = EXTRACT(a,0);
   1001       a2 = EXTRACT(a,1);
   1002       a1 = r ^ q[a1];
   1003       a2 = r ^ q[a2];
   1004       new = INSERT(a2,1) | a1;
   1005 #define M(i,j) \
   1006       a1 = EXTRACT(a,i); \
   1007       a2 = EXTRACT(a,j); \
   1008       a1 = r ^ q[a1]; \
   1009       a2 = r ^ q[a2]; \
   1010       new = new | INSERT(a1,i) | INSERT(a2,j)
   1011 
   1012       M(2,3);
   1013       M(4,5);
   1014       M(5,6);
   1015 #if RF_LONGSHIFT > 2
   1016       M(7,8);
   1017       M(9,10);
   1018       M(11,12);
   1019 #endif /* RF_LONGSHIFT > 2 */
   1020       *abuf++ = new;
   1021       length--;
   1022     }
   1023 }
   1024 
   1025 #endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */
   1026