Home | History | Annotate | Line # | Download | only in raidframe
rf_pq.c revision 1.4
      1 /*	$NetBSD: rf_pq.c,v 1.4 1999/08/13 03:41:57 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Daniel Stodolsky
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  * Code for RAID level 6 (P + Q) disk array architecture.
     31  */
     32 
     33 #include "rf_archs.h"
     34 #include "rf_types.h"
     35 #include "rf_raid.h"
     36 #include "rf_dag.h"
     37 #include "rf_dagffrd.h"
     38 #include "rf_dagffwr.h"
     39 #include "rf_dagdegrd.h"
     40 #include "rf_dagdegwr.h"
     41 #include "rf_dagutils.h"
     42 #include "rf_dagfuncs.h"
     43 #include "rf_threadid.h"
     44 #include "rf_etimer.h"
     45 #include "rf_pqdeg.h"
     46 #include "rf_general.h"
     47 #include "rf_map.h"
     48 #include "rf_pq.h"
     49 
     50 RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"};
     51 RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"};
     52 
     53 int
     54 rf_RegularONPFunc(node)
     55 	RF_DagNode_t *node;
     56 {
     57 	return (rf_RegularXorFunc(node));
     58 }
     59 /*
     60    same as simpleONQ func, but the coefficient is always 1
     61 */
     62 
     63 int
     64 rf_SimpleONPFunc(node)
     65 	RF_DagNode_t *node;
     66 {
     67 	return (rf_SimpleXorFunc(node));
     68 }
     69 
     70 int
     71 rf_RecoveryPFunc(node)
     72 	RF_DagNode_t *node;
     73 {
     74 	return (rf_RecoveryXorFunc(node));
     75 }
     76 
     77 int
     78 rf_RegularPFunc(node)
     79 	RF_DagNode_t *node;
     80 {
     81 	return (rf_RegularXorFunc(node));
     82 }
     83 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
     84 
     85 static void
     86 QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
     87     unsigned char coeff);
     88 static void
     89 rf_InvertQ(unsigned long *qbuf, unsigned long *abuf,
     90     unsigned length, unsigned coeff);
     91 
     92 RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"};
     93 RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"};
     94 RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"};
     95 
     96 void
     97 rf_PQDagSelect(
     98     RF_Raid_t * raidPtr,
     99     RF_IoType_t type,
    100     RF_AccessStripeMap_t * asmap,
    101     RF_VoidFuncPtr * createFunc)
    102 {
    103 	RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
    104 	unsigned ndfail = asmap->numDataFailed;
    105 	unsigned npfail = asmap->numParityFailed;
    106 	unsigned ntfail = npfail + ndfail;
    107 
    108 	RF_ASSERT(RF_IO_IS_R_OR_W(type));
    109 	if (ntfail > 2) {
    110 		RF_ERRORMSG("more than two disks failed in a single group!  Aborting I/O operation.\n");
    111 		 /* *infoFunc = */ *createFunc = NULL;
    112 		return;
    113 	}
    114 	/* ok, we can do this I/O */
    115 	if (type == RF_IO_TYPE_READ) {
    116 		switch (ndfail) {
    117 		case 0:
    118 			/* fault free read */
    119 			*createFunc = rf_CreateFaultFreeReadDAG;	/* same as raid 5 */
    120 			break;
    121 		case 1:
    122 			/* lost a single data unit */
    123 			/* two cases: (1) parity is not lost. do a normal raid
    124 			 * 5 reconstruct read. (2) parity is lost. do a
    125 			 * reconstruct read using "q". */
    126 			if (ntfail == 2) {	/* also lost redundancy */
    127 				if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
    128 					*createFunc = rf_PQ_110_CreateReadDAG;
    129 				else
    130 					*createFunc = rf_PQ_101_CreateReadDAG;
    131 			} else {
    132 				/* P and Q are ok. But is there a failure in
    133 				 * some unaccessed data unit? */
    134 				if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
    135 					*createFunc = rf_PQ_200_CreateReadDAG;
    136 				else
    137 					*createFunc = rf_PQ_100_CreateReadDAG;
    138 			}
    139 			break;
    140 		case 2:
    141 			/* lost two data units */
    142 			/* *infoFunc = PQOneTwo; */
    143 			*createFunc = rf_PQ_200_CreateReadDAG;
    144 			break;
    145 		}
    146 		return;
    147 	}
    148 	/* a write */
    149 	switch (ntfail) {
    150 	case 0:		/* fault free */
    151 		if (rf_suppressLocksAndLargeWrites ||
    152 		    (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
    153 			(asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
    154 
    155 			*createFunc = rf_PQCreateSmallWriteDAG;
    156 		} else {
    157 			*createFunc = rf_PQCreateLargeWriteDAG;
    158 		}
    159 		break;
    160 
    161 	case 1:		/* single disk fault */
    162 		if (npfail == 1) {
    163 			RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
    164 			if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) {	/* q died, treat like
    165 										 * normal mode raid5
    166 										 * write. */
    167 				if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
    168 				    || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
    169 					*createFunc = rf_PQ_001_CreateSmallWriteDAG;
    170 				else
    171 					*createFunc = rf_PQ_001_CreateLargeWriteDAG;
    172 			} else {/* parity died, small write only updating Q */
    173 				if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
    174 				    || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
    175 					*createFunc = rf_PQ_010_CreateSmallWriteDAG;
    176 				else
    177 					*createFunc = rf_PQ_010_CreateLargeWriteDAG;
    178 			}
    179 		} else {	/* data missing. Do a P reconstruct write if
    180 				 * only a single data unit is lost in the
    181 				 * stripe, otherwise a PQ reconstruct write. */
    182 			if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
    183 				*createFunc = rf_PQ_200_CreateWriteDAG;
    184 			else
    185 				*createFunc = rf_PQ_100_CreateWriteDAG;
    186 		}
    187 		break;
    188 
    189 	case 2:		/* two disk faults */
    190 		switch (npfail) {
    191 		case 2:	/* both p and q dead */
    192 			*createFunc = rf_PQ_011_CreateWriteDAG;
    193 			break;
    194 		case 1:	/* either p or q and dead data */
    195 			RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
    196 			RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
    197 			if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
    198 				*createFunc = rf_PQ_101_CreateWriteDAG;
    199 			else
    200 				*createFunc = rf_PQ_110_CreateWriteDAG;
    201 			break;
    202 		case 0:	/* double data loss */
    203 			*createFunc = rf_PQ_200_CreateWriteDAG;
    204 			break;
    205 		}
    206 		break;
    207 
    208 	default:		/* more than 2 disk faults */
    209 		*createFunc = NULL;
    210 		RF_PANIC();
    211 	}
    212 	return;
    213 }
    214 /*
    215    Used as a stop gap info function
    216 */
    217 static void
    218 PQOne(raidPtr, nSucc, nAnte, asmap)
    219 	RF_Raid_t *raidPtr;
    220 	int    *nSucc;
    221 	int    *nAnte;
    222 	RF_AccessStripeMap_t *asmap;
    223 {
    224 	*nSucc = *nAnte = 1;
    225 }
    226 
    227 static void
    228 PQOneTwo(raidPtr, nSucc, nAnte, asmap)
    229 	RF_Raid_t *raidPtr;
    230 	int    *nSucc;
    231 	int    *nAnte;
    232 	RF_AccessStripeMap_t *asmap;
    233 {
    234 	*nSucc = 1;
    235 	*nAnte = 2;
    236 }
    237 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
    238 {
    239 	rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2,
    240 	    rf_RegularPQFunc, RF_FALSE);
    241 }
    242 
    243 int
    244 rf_RegularONQFunc(node)
    245 	RF_DagNode_t *node;
    246 {
    247 	int     np = node->numParams;
    248 	int     d;
    249 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
    250 	int     i;
    251 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    252 	RF_Etimer_t timer;
    253 	char   *qbuf, *qpbuf;
    254 	char   *obuf, *nbuf;
    255 	RF_PhysDiskAddr_t *old, *new;
    256 	unsigned long coeff;
    257 	unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
    258 
    259 	RF_ETIMER_START(timer);
    260 
    261 	d = (np - 3) / 4;
    262 	RF_ASSERT(4 * d + 3 == np);
    263 	qbuf = (char *) node->params[2 * d + 1].p;	/* q buffer */
    264 	for (i = 0; i < d; i++) {
    265 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
    266 		obuf = (char *) node->params[2 * i + 1].p;
    267 		new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
    268 		nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
    269 		RF_ASSERT(new->numSector == old->numSector);
    270 		RF_ASSERT(new->raidAddress == old->raidAddress);
    271 		/* the stripe unit within the stripe tells us the coefficient
    272 		 * to use for the multiply. */
    273 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
    274 		/* compute the data unit offset within the column, then add
    275 		 * one */
    276 		coeff = (coeff % raidPtr->Layout.numDataCol);
    277 		qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
    278 		QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
    279 	}
    280 
    281 	RF_ETIMER_STOP(timer);
    282 	RF_ETIMER_EVAL(timer);
    283 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    284 	rf_GenericWakeupFunc(node, 0);	/* call wake func explicitly since no
    285 					 * I/O in this node */
    286 	return (0);
    287 }
    288 /*
    289    See the SimpleXORFunc for the difference between a simple and regular func.
    290    These Q functions should be used for
    291 
    292          new q = Q(data,old data,old q)
    293 
    294    style updates and not for
    295 
    296          q = ( new data, new data, .... )
    297 
    298    computations.
    299 
    300    The simple q takes 2(2d+1)+1 params, where d is the number
    301    of stripes written. The order of params is
    302    old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
    303    [2d] old q pda_0, old q buffer
    304    [2d_2] new data pda_0, new data buffer_0, ...                                    new data pda_d, new data buffer_d
    305    raidPtr
    306 */
    307 
    308 int
    309 rf_SimpleONQFunc(node)
    310 	RF_DagNode_t *node;
    311 {
    312 	int     np = node->numParams;
    313 	int     d;
    314 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
    315 	int     i;
    316 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    317 	RF_Etimer_t timer;
    318 	char   *qbuf;
    319 	char   *obuf, *nbuf;
    320 	RF_PhysDiskAddr_t *old, *new;
    321 	unsigned long coeff;
    322 
    323 	RF_ETIMER_START(timer);
    324 
    325 	d = (np - 3) / 4;
    326 	RF_ASSERT(4 * d + 3 == np);
    327 	qbuf = (char *) node->params[2 * d + 1].p;	/* q buffer */
    328 	for (i = 0; i < d; i++) {
    329 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
    330 		obuf = (char *) node->params[2 * i + 1].p;
    331 		new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
    332 		nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
    333 		RF_ASSERT(new->numSector == old->numSector);
    334 		RF_ASSERT(new->raidAddress == old->raidAddress);
    335 		/* the stripe unit within the stripe tells us the coefficient
    336 		 * to use for the multiply. */
    337 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
    338 		/* compute the data unit offset within the column, then add
    339 		 * one */
    340 		coeff = (coeff % raidPtr->Layout.numDataCol);
    341 		QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
    342 	}
    343 
    344 	RF_ETIMER_STOP(timer);
    345 	RF_ETIMER_EVAL(timer);
    346 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    347 	rf_GenericWakeupFunc(node, 0);	/* call wake func explicitly since no
    348 					 * I/O in this node */
    349 	return (0);
    350 }
    351 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
    352 {
    353 	rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);
    354 }
    355 
    356 static void
    357 RegularQSubr(node, qbuf)
    358 	RF_DagNode_t *node;
    359 	char   *qbuf;
    360 {
    361 	int     np = node->numParams;
    362 	int     d;
    363 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
    364 	unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
    365 	int     i;
    366 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    367 	RF_Etimer_t timer;
    368 	char   *obuf, *qpbuf;
    369 	RF_PhysDiskAddr_t *old;
    370 	unsigned long coeff;
    371 
    372 	RF_ETIMER_START(timer);
    373 
    374 	d = (np - 1) / 2;
    375 	RF_ASSERT(2 * d + 1 == np);
    376 	for (i = 0; i < d; i++) {
    377 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
    378 		obuf = (char *) node->params[2 * i + 1].p;
    379 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
    380 		/* compute the data unit offset within the column, then add
    381 		 * one */
    382 		coeff = (coeff % raidPtr->Layout.numDataCol);
    383 		/* the input buffers may not all be aligned with the start of
    384 		 * the stripe. so shift by their sector offset within the
    385 		 * stripe unit */
    386 		qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
    387 		rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
    388 	}
    389 
    390 	RF_ETIMER_STOP(timer);
    391 	RF_ETIMER_EVAL(timer);
    392 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    393 }
    394 /*
    395    used in degraded writes.
    396 */
    397 
    398 static void
    399 DegrQSubr(node)
    400 	RF_DagNode_t *node;
    401 {
    402 	int     np = node->numParams;
    403 	int     d;
    404 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
    405 	unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
    406 	int     i;
    407 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    408 	RF_Etimer_t timer;
    409 	char   *qbuf = node->results[1];
    410 	char   *obuf, *qpbuf;
    411 	RF_PhysDiskAddr_t *old;
    412 	unsigned long coeff;
    413 	unsigned fail_start;
    414 	int     j;
    415 
    416 	old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
    417 	fail_start = old->startSector % secPerSU;
    418 
    419 	RF_ETIMER_START(timer);
    420 
    421 	d = (np - 2) / 2;
    422 	RF_ASSERT(2 * d + 2 == np);
    423 	for (i = 0; i < d; i++) {
    424 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
    425 		obuf = (char *) node->params[2 * i + 1].p;
    426 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
    427 		/* compute the data unit offset within the column, then add
    428 		 * one */
    429 		coeff = (coeff % raidPtr->Layout.numDataCol);
    430 		/* the input buffers may not all be aligned with the start of
    431 		 * the stripe. so shift by their sector offset within the
    432 		 * stripe unit */
    433 		j = old->startSector % secPerSU;
    434 		RF_ASSERT(j >= fail_start);
    435 		qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
    436 		rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
    437 	}
    438 
    439 	RF_ETIMER_STOP(timer);
    440 	RF_ETIMER_EVAL(timer);
    441 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    442 }
    443 /*
    444    Called by large write code to compute the new parity and the new q.
    445 
    446    structure of the params:
    447 
    448    pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
    449    raidPtr
    450 
    451    for a total of 2d+1 arguments.
    452    The result buffers results[0], results[1] are the buffers for the p and q,
    453    respectively.
    454 
    455    We compute Q first, then compute P. The P calculation may try to reuse
    456    one of the input buffers for its output, so if we computed P first, we would
    457    corrupt the input for the q calculation.
    458 */
    459 
    460 int
    461 rf_RegularPQFunc(node)
    462 	RF_DagNode_t *node;
    463 {
    464 	RegularQSubr(node, node->results[1]);
    465 	return (rf_RegularXorFunc(node));	/* does the wakeup */
    466 }
    467 
    468 int
    469 rf_RegularQFunc(node)
    470 	RF_DagNode_t *node;
    471 {
    472 	/* Almost ... adjust Qsubr args */
    473 	RegularQSubr(node, node->results[0]);
    474 	rf_GenericWakeupFunc(node, 0);	/* call wake func explicitly since no
    475 					 * I/O in this node */
    476 	return (0);
    477 }
    478 /*
    479    Called by singly degraded write code to compute the new parity and the new q.
    480 
    481    structure of the params:
    482 
    483    pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
    484    failedPDA raidPtr
    485 
    486    for a total of 2d+2 arguments.
    487    The result buffers results[0], results[1] are the buffers for the parity and q,
    488    respectively.
    489 
    490    We compute Q first, then compute parity. The parity calculation may try to reuse
    491    one of the input buffers for its output, so if we computed parity first, we would
    492    corrupt the input for the q calculation.
    493 
    494    We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
    495 */
    496 
    497 void
    498 rf_Degraded_100_PQFunc(node)
    499 	RF_DagNode_t *node;
    500 {
    501 	int     np = node->numParams;
    502 
    503 	RF_ASSERT(np >= 2);
    504 	DegrQSubr(node);
    505 	rf_RecoveryXorFunc(node);
    506 }
    507 
    508 
    509 /*
    510    The two below are used when reading a stripe with a single lost data unit.
    511    The parameters are
    512 
    513    pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
    514 
    515    and results[0] contains the data buffer. Which is originally zero-filled.
    516 
    517 */
    518 
    519 /* this Q func is used by the degraded-mode dag functions to recover lost data.
    520  * the second-to-last parameter is the PDA for the failed portion of the access.
    521  * the code here looks at this PDA and assumes that the xor target buffer is
    522  * equal in size to the number of sectors in the failed PDA.  It then uses
    523  * the other PDAs in the parameter list to determine where within the target
    524  * buffer the corresponding data should be xored.
    525  *
    526  * Recall the basic equation is
    527  *
    528  *     Q = ( data_1 + 2 * data_2 ... + k * data_k  ) mod 256
    529  *
    530  * so to recover data_j we need
    531  *
    532  *    J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
    533  *
    534  * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
    535  * copying Q into it. Then we need to do a table lookup to convert to solve
    536  *   data_j /= J
    537  *
    538  *
    539  */
    540 int
    541 rf_RecoveryQFunc(node)
    542 	RF_DagNode_t *node;
    543 {
    544 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    545 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    546 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    547 	int     i;
    548 	RF_PhysDiskAddr_t *pda;
    549 	RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    550 	char   *srcbuf, *destbuf;
    551 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    552 	RF_Etimer_t timer;
    553 	unsigned long coeff;
    554 
    555 	RF_ETIMER_START(timer);
    556 	/* start by copying Q into the buffer */
    557 	bcopy(node->params[node->numParams - 3].p, node->results[0],
    558 	    rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
    559 	for (i = 0; i < node->numParams - 4; i += 2) {
    560 		RF_ASSERT(node->params[i + 1].p != node->results[0]);
    561 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    562 		srcbuf = (char *) node->params[i + 1].p;
    563 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    564 		destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    565 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress);
    566 		/* compute the data unit offset within the column */
    567 		coeff = (coeff % raidPtr->Layout.numDataCol);
    568 		rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
    569 	}
    570 	/* Do the nasty inversion now */
    571 	coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol);
    572 	rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
    573 	RF_ETIMER_STOP(timer);
    574 	RF_ETIMER_EVAL(timer);
    575 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    576 	rf_GenericWakeupFunc(node, 0);
    577 	return (0);
    578 }
    579 
    580 int
    581 rf_RecoveryPQFunc(node)
    582 	RF_DagNode_t *node;
    583 {
    584 	RF_PANIC();
    585 	return (1);
    586 }
    587 /*
    588    Degraded write Q subroutine.
    589    Used when P is dead.
    590    Large-write style Q computation.
    591    Parameters
    592 
    593    (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
    594 
    595    We ignore failedPDA.
    596 
    597    This is a "simple style" recovery func.
    598 */
    599 
    600 void
    601 rf_PQ_DegradedWriteQFunc(node)
    602 	RF_DagNode_t *node;
    603 {
    604 	int     np = node->numParams;
    605 	int     d;
    606 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
    607 	unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
    608 	int     i;
    609 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    610 	RF_Etimer_t timer;
    611 	char   *qbuf = node->results[0];
    612 	char   *obuf, *qpbuf;
    613 	RF_PhysDiskAddr_t *old;
    614 	unsigned long coeff;
    615 	int     fail_start, j;
    616 
    617 	old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
    618 	fail_start = old->startSector % secPerSU;
    619 
    620 	RF_ETIMER_START(timer);
    621 
    622 	d = (np - 2) / 2;
    623 	RF_ASSERT(2 * d + 2 == np);
    624 
    625 	for (i = 0; i < d; i++) {
    626 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
    627 		obuf = (char *) node->params[2 * i + 1].p;
    628 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
    629 		/* compute the data unit offset within the column, then add
    630 		 * one */
    631 		coeff = (coeff % raidPtr->Layout.numDataCol);
    632 		j = old->startSector % secPerSU;
    633 		RF_ASSERT(j >= fail_start);
    634 		qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
    635 		rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
    636 	}
    637 
    638 	RF_ETIMER_STOP(timer);
    639 	RF_ETIMER_EVAL(timer);
    640 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    641 	rf_GenericWakeupFunc(node, 0);
    642 }
    643 
    644 
    645 
    646 
    647 /* Q computations */
    648 
    649 /*
    650    coeff - colummn;
    651 
    652    compute  dest ^= qfor[28-coeff][rn[coeff+1] a]
    653 
    654    on 5-bit basis;
    655    length in bytes;
    656 */
    657 
    658 void
    659 rf_IncQ(dest, buf, length, coeff)
    660 	unsigned long *dest;
    661 	unsigned long *buf;
    662 	unsigned length;
    663 	unsigned coeff;
    664 {
    665 	unsigned long a, d, new;
    666 	unsigned long a1, a2;
    667 	unsigned int *q = &(rf_qfor[28 - coeff][0]);
    668 	unsigned r = rf_rn[coeff + 1];
    669 
    670 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
    671 #define INSERT(a,i) (a << (5L*i))
    672 
    673 	length /= 8;
    674 	/* 13 5 bit quants in a 64 bit word */
    675 	while (length) {
    676 		a = *buf++;
    677 		d = *dest;
    678 		a1 = EXTRACT(a, 0) ^ r;
    679 		a2 = EXTRACT(a, 1) ^ r;
    680 		new = INSERT(a2, 1) | a1;
    681 		a1 = EXTRACT(a, 2) ^ r;
    682 		a2 = EXTRACT(a, 3) ^ r;
    683 		a1 = q[a1];
    684 		a2 = q[a2];
    685 		new = new | INSERT(a1, 2) | INSERT(a2, 3);
    686 		a1 = EXTRACT(a, 4) ^ r;
    687 		a2 = EXTRACT(a, 5) ^ r;
    688 		a1 = q[a1];
    689 		a2 = q[a2];
    690 		new = new | INSERT(a1, 4) | INSERT(a2, 5);
    691 		a1 = EXTRACT(a, 5) ^ r;
    692 		a2 = EXTRACT(a, 6) ^ r;
    693 		a1 = q[a1];
    694 		a2 = q[a2];
    695 		new = new | INSERT(a1, 5) | INSERT(a2, 6);
    696 #if RF_LONGSHIFT > 2
    697 		a1 = EXTRACT(a, 7) ^ r;
    698 		a2 = EXTRACT(a, 8) ^ r;
    699 		a1 = q[a1];
    700 		a2 = q[a2];
    701 		new = new | INSERT(a1, 7) | INSERT(a2, 8);
    702 		a1 = EXTRACT(a, 9) ^ r;
    703 		a2 = EXTRACT(a, 10) ^ r;
    704 		a1 = q[a1];
    705 		a2 = q[a2];
    706 		new = new | INSERT(a1, 9) | INSERT(a2, 10);
    707 		a1 = EXTRACT(a, 11) ^ r;
    708 		a2 = EXTRACT(a, 12) ^ r;
    709 		a1 = q[a1];
    710 		a2 = q[a2];
    711 		new = new | INSERT(a1, 11) | INSERT(a2, 12);
    712 #endif				/* RF_LONGSHIFT > 2 */
    713 		d ^= new;
    714 		*dest++ = d;
    715 		length--;
    716 	}
    717 }
    718 /*
    719    compute
    720 
    721    dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
    722 
    723    on a five bit basis.
    724    optimization: compute old ^ new on 64 bit basis.
    725 
    726    length in bytes.
    727 */
    728 
    729 static void
    730 QDelta(
    731     char *dest,
    732     char *obuf,
    733     char *nbuf,
    734     unsigned length,
    735     unsigned char coeff)
    736 {
    737 	unsigned long a, d, new;
    738 	unsigned long a1, a2;
    739 	unsigned int *q = &(rf_qfor[28 - coeff][0]);
    740 	unsigned r = rf_rn[coeff + 1];
    741 
    742 #ifdef _KERNEL
    743 	/* PQ in kernel currently not supported because the encoding/decoding
    744 	 * table is not present */
    745 	bzero(dest, length);
    746 #else				/* KERNEL */
    747 	/* this code probably doesn't work and should be rewritten  -wvcii */
    748 	/* 13 5 bit quants in a 64 bit word */
    749 	length /= 8;
    750 	while (length) {
    751 		a = *obuf++;	/* XXX need to reorg to avoid cache conflicts */
    752 		a ^= *nbuf++;
    753 		d = *dest;
    754 		a1 = EXTRACT(a, 0) ^ r;
    755 		a2 = EXTRACT(a, 1) ^ r;
    756 		a1 = q[a1];
    757 		a2 = q[a2];
    758 		new = INSERT(a2, 1) | a1;
    759 		a1 = EXTRACT(a, 2) ^ r;
    760 		a2 = EXTRACT(a, 3) ^ r;
    761 		a1 = q[a1];
    762 		a2 = q[a2];
    763 		new = new | INSERT(a1, 2) | INSERT(a2, 3);
    764 		a1 = EXTRACT(a, 4) ^ r;
    765 		a2 = EXTRACT(a, 5) ^ r;
    766 		a1 = q[a1];
    767 		a2 = q[a2];
    768 		new = new | INSERT(a1, 4) | INSERT(a2, 5);
    769 		a1 = EXTRACT(a, 5) ^ r;
    770 		a2 = EXTRACT(a, 6) ^ r;
    771 		a1 = q[a1];
    772 		a2 = q[a2];
    773 		new = new | INSERT(a1, 5) | INSERT(a2, 6);
    774 #if RF_LONGSHIFT > 2
    775 		a1 = EXTRACT(a, 7) ^ r;
    776 		a2 = EXTRACT(a, 8) ^ r;
    777 		a1 = q[a1];
    778 		a2 = q[a2];
    779 		new = new | INSERT(a1, 7) | INSERT(a2, 8);
    780 		a1 = EXTRACT(a, 9) ^ r;
    781 		a2 = EXTRACT(a, 10) ^ r;
    782 		a1 = q[a1];
    783 		a2 = q[a2];
    784 		new = new | INSERT(a1, 9) | INSERT(a2, 10);
    785 		a1 = EXTRACT(a, 11) ^ r;
    786 		a2 = EXTRACT(a, 12) ^ r;
    787 		a1 = q[a1];
    788 		a2 = q[a2];
    789 		new = new | INSERT(a1, 11) | INSERT(a2, 12);
    790 #endif				/* RF_LONGSHIFT > 2 */
    791 		d ^= new;
    792 		*dest++ = d;
    793 		length--;
    794 	}
    795 #endif				/* _KERNEL */
    796 }
    797 /*
    798    recover columns a and b from the given p and q into
    799    bufs abuf and bbuf. All bufs are word aligned.
    800    Length is in bytes.
    801 */
    802 
    803 
    804 /*
    805  * XXX
    806  *
    807  * Everything about this seems wrong.
    808  */
    809 void
    810 rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b)
    811 	unsigned long *pbuf;
    812 	unsigned long *qbuf;
    813 	unsigned long *abuf;
    814 	unsigned long *bbuf;
    815 	unsigned length;
    816 	unsigned coeff_a;
    817 	unsigned coeff_b;
    818 {
    819 	unsigned long p, q, a, a0, a1;
    820 	int     col = (29 * coeff_a) + coeff_b;
    821 	unsigned char *q0 = &(rf_qinv[col][0]);
    822 
    823 	length /= 8;
    824 	while (length) {
    825 		p = *pbuf++;
    826 		q = *qbuf++;
    827 		a0 = EXTRACT(p, 0);
    828 		a1 = EXTRACT(q, 0);
    829 		a = q0[a0 << 5 | a1];
    830 #define MF(i) \
    831       a0 = EXTRACT(p,i); \
    832       a1 = EXTRACT(q,i); \
    833       a  = a | INSERT(q0[a0<<5 | a1],i)
    834 
    835 		MF(1);
    836 		MF(2);
    837 		MF(3);
    838 		MF(4);
    839 		MF(5);
    840 		MF(6);
    841 #if 0
    842 		MF(7);
    843 		MF(8);
    844 		MF(9);
    845 		MF(10);
    846 		MF(11);
    847 		MF(12);
    848 #endif				/* 0 */
    849 		*abuf++ = a;
    850 		*bbuf++ = a ^ p;
    851 		length--;
    852 	}
    853 }
    854 /*
    855    Lost parity and a data column. Recover that data column.
    856    Assume col coeff is lost. Let q the contents of Q after
    857    all surviving data columns have been q-xored out of it.
    858    Then we have the equation
    859 
    860    q[28-coeff][a_i ^ r_i+1] = q
    861 
    862    but q is cyclic with period 31.
    863    So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
    864       q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
    865 
    866    so a_i = r_{coeff+1} ^ q[3+coeff][q]
    867 
    868    The routine is passed q buffer and the buffer
    869    the data is to be recoverd into. They can be the same.
    870 */
    871 
    872 
    873 
    874 static void
    875 rf_InvertQ(
    876     unsigned long *qbuf,
    877     unsigned long *abuf,
    878     unsigned length,
    879     unsigned coeff)
    880 {
    881 	unsigned long a, new;
    882 	unsigned long a1, a2;
    883 	unsigned int *q = &(rf_qfor[3 + coeff][0]);
    884 	unsigned r = rf_rn[coeff + 1];
    885 
    886 	/* 13 5 bit quants in a 64 bit word */
    887 	length /= 8;
    888 	while (length) {
    889 		a = *qbuf++;
    890 		a1 = EXTRACT(a, 0);
    891 		a2 = EXTRACT(a, 1);
    892 		a1 = r ^ q[a1];
    893 		a2 = r ^ q[a2];
    894 		new = INSERT(a2, 1) | a1;
    895 #define M(i,j) \
    896       a1 = EXTRACT(a,i); \
    897       a2 = EXTRACT(a,j); \
    898       a1 = r ^ q[a1]; \
    899       a2 = r ^ q[a2]; \
    900       new = new | INSERT(a1,i) | INSERT(a2,j)
    901 
    902 		M(2, 3);
    903 		M(4, 5);
    904 		M(5, 6);
    905 #if RF_LONGSHIFT > 2
    906 		M(7, 8);
    907 		M(9, 10);
    908 		M(11, 12);
    909 #endif				/* RF_LONGSHIFT > 2 */
    910 		*abuf++ = new;
    911 		length--;
    912 	}
    913 }
    914 #endif				/* (RF_INCLUDE_DECL_PQ > 0) ||
    915 				 * (RF_INCLUDE_RAID6 > 0) */
    916