Home | History | Annotate | Line # | Download | only in raidframe
rf_pq.c revision 1.5
      1 /*	$NetBSD: rf_pq.c,v 1.5 1999/08/15 02:36:40 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Daniel Stodolsky
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  * Code for RAID level 6 (P + Q) disk array architecture.
     31  */
     32 
     33 #include "rf_archs.h"
     34 #include "rf_types.h"
     35 #include "rf_raid.h"
     36 #include "rf_dag.h"
     37 #include "rf_dagffrd.h"
     38 #include "rf_dagffwr.h"
     39 #include "rf_dagdegrd.h"
     40 #include "rf_dagdegwr.h"
     41 #include "rf_dagutils.h"
     42 #include "rf_dagfuncs.h"
     43 #include "rf_threadid.h"
     44 #include "rf_etimer.h"
     45 #include "rf_pqdeg.h"
     46 #include "rf_general.h"
     47 #include "rf_map.h"
     48 #include "rf_pq.h"
     49 
     50 RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"};
     51 RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"};
     52 
     53 int
     54 rf_RegularONPFunc(node)
     55 	RF_DagNode_t *node;
     56 {
     57 	return (rf_RegularXorFunc(node));
     58 }
     59 /*
     60    same as simpleONQ func, but the coefficient is always 1
     61 */
     62 
     63 int
     64 rf_SimpleONPFunc(node)
     65 	RF_DagNode_t *node;
     66 {
     67 	return (rf_SimpleXorFunc(node));
     68 }
     69 
     70 int
     71 rf_RecoveryPFunc(node)
     72 	RF_DagNode_t *node;
     73 {
     74 	return (rf_RecoveryXorFunc(node));
     75 }
     76 
     77 int
     78 rf_RegularPFunc(node)
     79 	RF_DagNode_t *node;
     80 {
     81 	return (rf_RegularXorFunc(node));
     82 }
     83 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
     84 
     85 static void
     86 QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
     87     unsigned char coeff);
     88 static void
     89 rf_InvertQ(unsigned long *qbuf, unsigned long *abuf,
     90     unsigned length, unsigned coeff);
     91 
     92 RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"};
     93 RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"};
     94 RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"};
     95 
     96 void
     97 rf_PQDagSelect(
     98     RF_Raid_t * raidPtr,
     99     RF_IoType_t type,
    100     RF_AccessStripeMap_t * asmap,
    101     RF_VoidFuncPtr * createFunc)
    102 {
    103 	RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
    104 	unsigned ndfail = asmap->numDataFailed;
    105 	unsigned npfail = asmap->numParityFailed;
    106 	unsigned ntfail = npfail + ndfail;
    107 
    108 	RF_ASSERT(RF_IO_IS_R_OR_W(type));
    109 	if (ntfail > 2) {
    110 		RF_ERRORMSG("more than two disks failed in a single group!  Aborting I/O operation.\n");
    111 		 /* *infoFunc = */ *createFunc = NULL;
    112 		return;
    113 	}
    114 	/* ok, we can do this I/O */
    115 	if (type == RF_IO_TYPE_READ) {
    116 		switch (ndfail) {
    117 		case 0:
    118 			/* fault free read */
    119 			*createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG;	/* same as raid 5 */
    120 			break;
    121 		case 1:
    122 			/* lost a single data unit */
    123 			/* two cases: (1) parity is not lost. do a normal raid
    124 			 * 5 reconstruct read. (2) parity is lost. do a
    125 			 * reconstruct read using "q". */
    126 			if (ntfail == 2) {	/* also lost redundancy */
    127 				if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
    128 					*createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG;
    129 				else
    130 					*createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG;
    131 			} else {
    132 				/* P and Q are ok. But is there a failure in
    133 				 * some unaccessed data unit? */
    134 				if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
    135 					*createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
    136 				else
    137 					*createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG;
    138 			}
    139 			break;
    140 		case 2:
    141 			/* lost two data units */
    142 			/* *infoFunc = PQOneTwo; */
    143 			*createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
    144 			break;
    145 		}
    146 		return;
    147 	}
    148 	/* a write */
    149 	switch (ntfail) {
    150 	case 0:		/* fault free */
    151 		if (rf_suppressLocksAndLargeWrites ||
    152 		    (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
    153 			(asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
    154 
    155 			*createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG;
    156 		} else {
    157 			*createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG;
    158 		}
    159 		break;
    160 
    161 	case 1:		/* single disk fault */
    162 		if (npfail == 1) {
    163 			RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
    164 			if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) {	/* q died, treat like
    165 										 * normal mode raid5
    166 										 * write. */
    167 				if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
    168 				    || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
    169 					*createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG;
    170 				else
    171 					*createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG;
    172 			} else {/* parity died, small write only updating Q */
    173 				if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
    174 				    || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
    175 					*createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG;
    176 				else
    177 					*createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG;
    178 			}
    179 		} else {	/* data missing. Do a P reconstruct write if
    180 				 * only a single data unit is lost in the
    181 				 * stripe, otherwise a PQ reconstruct write. */
    182 			if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
    183 				*createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
    184 			else
    185 				*createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG;
    186 		}
    187 		break;
    188 
    189 	case 2:		/* two disk faults */
    190 		switch (npfail) {
    191 		case 2:	/* both p and q dead */
    192 			*createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG;
    193 			break;
    194 		case 1:	/* either p or q and dead data */
    195 			RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
    196 			RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
    197 			if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
    198 				*createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG;
    199 			else
    200 				*createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG;
    201 			break;
    202 		case 0:	/* double data loss */
    203 			*createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
    204 			break;
    205 		}
    206 		break;
    207 
    208 	default:		/* more than 2 disk faults */
    209 		*createFunc = NULL;
    210 		RF_PANIC();
    211 	}
    212 	return;
    213 }
    214 /*
    215    Used as a stop gap info function
    216 */
    217 #if 0
    218 static void
    219 PQOne(raidPtr, nSucc, nAnte, asmap)
    220 	RF_Raid_t *raidPtr;
    221 	int    *nSucc;
    222 	int    *nAnte;
    223 	RF_AccessStripeMap_t *asmap;
    224 {
    225 	*nSucc = *nAnte = 1;
    226 }
    227 
    228 static void
    229 PQOneTwo(raidPtr, nSucc, nAnte, asmap)
    230 	RF_Raid_t *raidPtr;
    231 	int    *nSucc;
    232 	int    *nAnte;
    233 	RF_AccessStripeMap_t *asmap;
    234 {
    235 	*nSucc = 1;
    236 	*nAnte = 2;
    237 }
    238 #endif
    239 
    240 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
    241 {
    242 	rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2,
    243 	    rf_RegularPQFunc, RF_FALSE);
    244 }
    245 
    246 int
    247 rf_RegularONQFunc(node)
    248 	RF_DagNode_t *node;
    249 {
    250 	int     np = node->numParams;
    251 	int     d;
    252 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
    253 	int     i;
    254 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    255 	RF_Etimer_t timer;
    256 	char   *qbuf, *qpbuf;
    257 	char   *obuf, *nbuf;
    258 	RF_PhysDiskAddr_t *old, *new;
    259 	unsigned long coeff;
    260 	unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
    261 
    262 	RF_ETIMER_START(timer);
    263 
    264 	d = (np - 3) / 4;
    265 	RF_ASSERT(4 * d + 3 == np);
    266 	qbuf = (char *) node->params[2 * d + 1].p;	/* q buffer */
    267 	for (i = 0; i < d; i++) {
    268 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
    269 		obuf = (char *) node->params[2 * i + 1].p;
    270 		new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
    271 		nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
    272 		RF_ASSERT(new->numSector == old->numSector);
    273 		RF_ASSERT(new->raidAddress == old->raidAddress);
    274 		/* the stripe unit within the stripe tells us the coefficient
    275 		 * to use for the multiply. */
    276 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
    277 		/* compute the data unit offset within the column, then add
    278 		 * one */
    279 		coeff = (coeff % raidPtr->Layout.numDataCol);
    280 		qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
    281 		QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
    282 	}
    283 
    284 	RF_ETIMER_STOP(timer);
    285 	RF_ETIMER_EVAL(timer);
    286 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    287 	rf_GenericWakeupFunc(node, 0);	/* call wake func explicitly since no
    288 					 * I/O in this node */
    289 	return (0);
    290 }
    291 /*
    292    See the SimpleXORFunc for the difference between a simple and regular func.
    293    These Q functions should be used for
    294 
    295          new q = Q(data,old data,old q)
    296 
    297    style updates and not for
    298 
    299          q = ( new data, new data, .... )
    300 
    301    computations.
    302 
    303    The simple q takes 2(2d+1)+1 params, where d is the number
    304    of stripes written. The order of params is
    305    old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
    306    [2d] old q pda_0, old q buffer
    307    [2d_2] new data pda_0, new data buffer_0, ...                                    new data pda_d, new data buffer_d
    308    raidPtr
    309 */
    310 
    311 int
    312 rf_SimpleONQFunc(node)
    313 	RF_DagNode_t *node;
    314 {
    315 	int     np = node->numParams;
    316 	int     d;
    317 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
    318 	int     i;
    319 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    320 	RF_Etimer_t timer;
    321 	char   *qbuf;
    322 	char   *obuf, *nbuf;
    323 	RF_PhysDiskAddr_t *old, *new;
    324 	unsigned long coeff;
    325 
    326 	RF_ETIMER_START(timer);
    327 
    328 	d = (np - 3) / 4;
    329 	RF_ASSERT(4 * d + 3 == np);
    330 	qbuf = (char *) node->params[2 * d + 1].p;	/* q buffer */
    331 	for (i = 0; i < d; i++) {
    332 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
    333 		obuf = (char *) node->params[2 * i + 1].p;
    334 		new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
    335 		nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
    336 		RF_ASSERT(new->numSector == old->numSector);
    337 		RF_ASSERT(new->raidAddress == old->raidAddress);
    338 		/* the stripe unit within the stripe tells us the coefficient
    339 		 * to use for the multiply. */
    340 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
    341 		/* compute the data unit offset within the column, then add
    342 		 * one */
    343 		coeff = (coeff % raidPtr->Layout.numDataCol);
    344 		QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
    345 	}
    346 
    347 	RF_ETIMER_STOP(timer);
    348 	RF_ETIMER_EVAL(timer);
    349 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    350 	rf_GenericWakeupFunc(node, 0);	/* call wake func explicitly since no
    351 					 * I/O in this node */
    352 	return (0);
    353 }
    354 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
    355 {
    356 	rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);
    357 }
    358 
    359 static void RegularQSubr(RF_DagNode_t *node, char   *qbuf);
    360 
    361 static void
    362 RegularQSubr(node, qbuf)
    363 	RF_DagNode_t *node;
    364 	char   *qbuf;
    365 {
    366 	int     np = node->numParams;
    367 	int     d;
    368 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
    369 	unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
    370 	int     i;
    371 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    372 	RF_Etimer_t timer;
    373 	char   *obuf, *qpbuf;
    374 	RF_PhysDiskAddr_t *old;
    375 	unsigned long coeff;
    376 
    377 	RF_ETIMER_START(timer);
    378 
    379 	d = (np - 1) / 2;
    380 	RF_ASSERT(2 * d + 1 == np);
    381 	for (i = 0; i < d; i++) {
    382 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
    383 		obuf = (char *) node->params[2 * i + 1].p;
    384 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
    385 		/* compute the data unit offset within the column, then add
    386 		 * one */
    387 		coeff = (coeff % raidPtr->Layout.numDataCol);
    388 		/* the input buffers may not all be aligned with the start of
    389 		 * the stripe. so shift by their sector offset within the
    390 		 * stripe unit */
    391 		qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
    392 		rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
    393 	}
    394 
    395 	RF_ETIMER_STOP(timer);
    396 	RF_ETIMER_EVAL(timer);
    397 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    398 }
    399 /*
    400    used in degraded writes.
    401 */
    402 
    403 static void DegrQSubr(RF_DagNode_t *node);
    404 
    405 static void
    406 DegrQSubr(node)
    407 	RF_DagNode_t *node;
    408 {
    409 	int     np = node->numParams;
    410 	int     d;
    411 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
    412 	unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
    413 	int     i;
    414 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    415 	RF_Etimer_t timer;
    416 	char   *qbuf = node->results[1];
    417 	char   *obuf, *qpbuf;
    418 	RF_PhysDiskAddr_t *old;
    419 	unsigned long coeff;
    420 	unsigned fail_start;
    421 	int     j;
    422 
    423 	old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
    424 	fail_start = old->startSector % secPerSU;
    425 
    426 	RF_ETIMER_START(timer);
    427 
    428 	d = (np - 2) / 2;
    429 	RF_ASSERT(2 * d + 2 == np);
    430 	for (i = 0; i < d; i++) {
    431 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
    432 		obuf = (char *) node->params[2 * i + 1].p;
    433 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
    434 		/* compute the data unit offset within the column, then add
    435 		 * one */
    436 		coeff = (coeff % raidPtr->Layout.numDataCol);
    437 		/* the input buffers may not all be aligned with the start of
    438 		 * the stripe. so shift by their sector offset within the
    439 		 * stripe unit */
    440 		j = old->startSector % secPerSU;
    441 		RF_ASSERT(j >= fail_start);
    442 		qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
    443 		rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
    444 	}
    445 
    446 	RF_ETIMER_STOP(timer);
    447 	RF_ETIMER_EVAL(timer);
    448 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    449 }
    450 /*
    451    Called by large write code to compute the new parity and the new q.
    452 
    453    structure of the params:
    454 
    455    pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
    456    raidPtr
    457 
    458    for a total of 2d+1 arguments.
    459    The result buffers results[0], results[1] are the buffers for the p and q,
    460    respectively.
    461 
    462    We compute Q first, then compute P. The P calculation may try to reuse
    463    one of the input buffers for its output, so if we computed P first, we would
    464    corrupt the input for the q calculation.
    465 */
    466 
    467 int
    468 rf_RegularPQFunc(node)
    469 	RF_DagNode_t *node;
    470 {
    471 	RegularQSubr(node, node->results[1]);
    472 	return (rf_RegularXorFunc(node));	/* does the wakeup */
    473 }
    474 
    475 int
    476 rf_RegularQFunc(node)
    477 	RF_DagNode_t *node;
    478 {
    479 	/* Almost ... adjust Qsubr args */
    480 	RegularQSubr(node, node->results[0]);
    481 	rf_GenericWakeupFunc(node, 0);	/* call wake func explicitly since no
    482 					 * I/O in this node */
    483 	return (0);
    484 }
    485 /*
    486    Called by singly degraded write code to compute the new parity and the new q.
    487 
    488    structure of the params:
    489 
    490    pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
    491    failedPDA raidPtr
    492 
    493    for a total of 2d+2 arguments.
    494    The result buffers results[0], results[1] are the buffers for the parity and q,
    495    respectively.
    496 
    497    We compute Q first, then compute parity. The parity calculation may try to reuse
    498    one of the input buffers for its output, so if we computed parity first, we would
    499    corrupt the input for the q calculation.
    500 
    501    We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
    502 */
    503 
    504 void
    505 rf_Degraded_100_PQFunc(node)
    506 	RF_DagNode_t *node;
    507 {
    508 	int     np = node->numParams;
    509 
    510 	RF_ASSERT(np >= 2);
    511 	DegrQSubr(node);
    512 	rf_RecoveryXorFunc(node);
    513 }
    514 
    515 
    516 /*
    517    The two below are used when reading a stripe with a single lost data unit.
    518    The parameters are
    519 
    520    pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
    521 
    522    and results[0] contains the data buffer. Which is originally zero-filled.
    523 
    524 */
    525 
    526 /* this Q func is used by the degraded-mode dag functions to recover lost data.
    527  * the second-to-last parameter is the PDA for the failed portion of the access.
    528  * the code here looks at this PDA and assumes that the xor target buffer is
    529  * equal in size to the number of sectors in the failed PDA.  It then uses
    530  * the other PDAs in the parameter list to determine where within the target
    531  * buffer the corresponding data should be xored.
    532  *
    533  * Recall the basic equation is
    534  *
    535  *     Q = ( data_1 + 2 * data_2 ... + k * data_k  ) mod 256
    536  *
    537  * so to recover data_j we need
    538  *
    539  *    J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
    540  *
    541  * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
    542  * copying Q into it. Then we need to do a table lookup to convert to solve
    543  *   data_j /= J
    544  *
    545  *
    546  */
    547 int
    548 rf_RecoveryQFunc(node)
    549 	RF_DagNode_t *node;
    550 {
    551 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    552 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    553 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    554 	int     i;
    555 	RF_PhysDiskAddr_t *pda;
    556 	RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    557 	char   *srcbuf, *destbuf;
    558 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    559 	RF_Etimer_t timer;
    560 	unsigned long coeff;
    561 
    562 	RF_ETIMER_START(timer);
    563 	/* start by copying Q into the buffer */
    564 	bcopy(node->params[node->numParams - 3].p, node->results[0],
    565 	    rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
    566 	for (i = 0; i < node->numParams - 4; i += 2) {
    567 		RF_ASSERT(node->params[i + 1].p != node->results[0]);
    568 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    569 		srcbuf = (char *) node->params[i + 1].p;
    570 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    571 		destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    572 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress);
    573 		/* compute the data unit offset within the column */
    574 		coeff = (coeff % raidPtr->Layout.numDataCol);
    575 		rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
    576 	}
    577 	/* Do the nasty inversion now */
    578 	coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol);
    579 	rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
    580 	RF_ETIMER_STOP(timer);
    581 	RF_ETIMER_EVAL(timer);
    582 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    583 	rf_GenericWakeupFunc(node, 0);
    584 	return (0);
    585 }
    586 
    587 int
    588 rf_RecoveryPQFunc(node)
    589 	RF_DagNode_t *node;
    590 {
    591 	RF_PANIC();
    592 	return (1);
    593 }
    594 /*
    595    Degraded write Q subroutine.
    596    Used when P is dead.
    597    Large-write style Q computation.
    598    Parameters
    599 
    600    (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
    601 
    602    We ignore failedPDA.
    603 
    604    This is a "simple style" recovery func.
    605 */
    606 
    607 void
    608 rf_PQ_DegradedWriteQFunc(node)
    609 	RF_DagNode_t *node;
    610 {
    611 	int     np = node->numParams;
    612 	int     d;
    613 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
    614 	unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
    615 	int     i;
    616 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    617 	RF_Etimer_t timer;
    618 	char   *qbuf = node->results[0];
    619 	char   *obuf, *qpbuf;
    620 	RF_PhysDiskAddr_t *old;
    621 	unsigned long coeff;
    622 	int     fail_start, j;
    623 
    624 	old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
    625 	fail_start = old->startSector % secPerSU;
    626 
    627 	RF_ETIMER_START(timer);
    628 
    629 	d = (np - 2) / 2;
    630 	RF_ASSERT(2 * d + 2 == np);
    631 
    632 	for (i = 0; i < d; i++) {
    633 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
    634 		obuf = (char *) node->params[2 * i + 1].p;
    635 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
    636 		/* compute the data unit offset within the column, then add
    637 		 * one */
    638 		coeff = (coeff % raidPtr->Layout.numDataCol);
    639 		j = old->startSector % secPerSU;
    640 		RF_ASSERT(j >= fail_start);
    641 		qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
    642 		rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
    643 	}
    644 
    645 	RF_ETIMER_STOP(timer);
    646 	RF_ETIMER_EVAL(timer);
    647 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    648 	rf_GenericWakeupFunc(node, 0);
    649 }
    650 
    651 
    652 
    653 
    654 /* Q computations */
    655 
    656 /*
    657    coeff - colummn;
    658 
    659    compute  dest ^= qfor[28-coeff][rn[coeff+1] a]
    660 
    661    on 5-bit basis;
    662    length in bytes;
    663 */
    664 
    665 void
    666 rf_IncQ(dest, buf, length, coeff)
    667 	unsigned long *dest;
    668 	unsigned long *buf;
    669 	unsigned length;
    670 	unsigned coeff;
    671 {
    672 	unsigned long a, d, new;
    673 	unsigned long a1, a2;
    674 	unsigned int *q = &(rf_qfor[28 - coeff][0]);
    675 	unsigned r = rf_rn[coeff + 1];
    676 
    677 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
    678 #define INSERT(a,i) (a << (5L*i))
    679 
    680 	length /= 8;
    681 	/* 13 5 bit quants in a 64 bit word */
    682 	while (length) {
    683 		a = *buf++;
    684 		d = *dest;
    685 		a1 = EXTRACT(a, 0) ^ r;
    686 		a2 = EXTRACT(a, 1) ^ r;
    687 		new = INSERT(a2, 1) | a1;
    688 		a1 = EXTRACT(a, 2) ^ r;
    689 		a2 = EXTRACT(a, 3) ^ r;
    690 		a1 = q[a1];
    691 		a2 = q[a2];
    692 		new = new | INSERT(a1, 2) | INSERT(a2, 3);
    693 		a1 = EXTRACT(a, 4) ^ r;
    694 		a2 = EXTRACT(a, 5) ^ r;
    695 		a1 = q[a1];
    696 		a2 = q[a2];
    697 		new = new | INSERT(a1, 4) | INSERT(a2, 5);
    698 		a1 = EXTRACT(a, 5) ^ r;
    699 		a2 = EXTRACT(a, 6) ^ r;
    700 		a1 = q[a1];
    701 		a2 = q[a2];
    702 		new = new | INSERT(a1, 5) | INSERT(a2, 6);
    703 #if RF_LONGSHIFT > 2
    704 		a1 = EXTRACT(a, 7) ^ r;
    705 		a2 = EXTRACT(a, 8) ^ r;
    706 		a1 = q[a1];
    707 		a2 = q[a2];
    708 		new = new | INSERT(a1, 7) | INSERT(a2, 8);
    709 		a1 = EXTRACT(a, 9) ^ r;
    710 		a2 = EXTRACT(a, 10) ^ r;
    711 		a1 = q[a1];
    712 		a2 = q[a2];
    713 		new = new | INSERT(a1, 9) | INSERT(a2, 10);
    714 		a1 = EXTRACT(a, 11) ^ r;
    715 		a2 = EXTRACT(a, 12) ^ r;
    716 		a1 = q[a1];
    717 		a2 = q[a2];
    718 		new = new | INSERT(a1, 11) | INSERT(a2, 12);
    719 #endif				/* RF_LONGSHIFT > 2 */
    720 		d ^= new;
    721 		*dest++ = d;
    722 		length--;
    723 	}
    724 }
    725 /*
    726    compute
    727 
    728    dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
    729 
    730    on a five bit basis.
    731    optimization: compute old ^ new on 64 bit basis.
    732 
    733    length in bytes.
    734 */
    735 
    736 static void
    737 QDelta(
    738     char *dest,
    739     char *obuf,
    740     char *nbuf,
    741     unsigned length,
    742     unsigned char coeff)
    743 {
    744 	unsigned long a, d, new;
    745 	unsigned long a1, a2;
    746 	unsigned int *q = &(rf_qfor[28 - coeff][0]);
    747 	unsigned int r = rf_rn[coeff + 1];
    748 
    749 	r = a1 = a2 = new = d = a = 0; /* XXX for now... */
    750 	q = NULL; /* XXX for now */
    751 
    752 #ifdef _KERNEL
    753 	/* PQ in kernel currently not supported because the encoding/decoding
    754 	 * table is not present */
    755 	bzero(dest, length);
    756 #else				/* KERNEL */
    757 	/* this code probably doesn't work and should be rewritten  -wvcii */
    758 	/* 13 5 bit quants in a 64 bit word */
    759 	length /= 8;
    760 	while (length) {
    761 		a = *obuf++;	/* XXX need to reorg to avoid cache conflicts */
    762 		a ^= *nbuf++;
    763 		d = *dest;
    764 		a1 = EXTRACT(a, 0) ^ r;
    765 		a2 = EXTRACT(a, 1) ^ r;
    766 		a1 = q[a1];
    767 		a2 = q[a2];
    768 		new = INSERT(a2, 1) | a1;
    769 		a1 = EXTRACT(a, 2) ^ r;
    770 		a2 = EXTRACT(a, 3) ^ r;
    771 		a1 = q[a1];
    772 		a2 = q[a2];
    773 		new = new | INSERT(a1, 2) | INSERT(a2, 3);
    774 		a1 = EXTRACT(a, 4) ^ r;
    775 		a2 = EXTRACT(a, 5) ^ r;
    776 		a1 = q[a1];
    777 		a2 = q[a2];
    778 		new = new | INSERT(a1, 4) | INSERT(a2, 5);
    779 		a1 = EXTRACT(a, 5) ^ r;
    780 		a2 = EXTRACT(a, 6) ^ r;
    781 		a1 = q[a1];
    782 		a2 = q[a2];
    783 		new = new | INSERT(a1, 5) | INSERT(a2, 6);
    784 #if RF_LONGSHIFT > 2
    785 		a1 = EXTRACT(a, 7) ^ r;
    786 		a2 = EXTRACT(a, 8) ^ r;
    787 		a1 = q[a1];
    788 		a2 = q[a2];
    789 		new = new | INSERT(a1, 7) | INSERT(a2, 8);
    790 		a1 = EXTRACT(a, 9) ^ r;
    791 		a2 = EXTRACT(a, 10) ^ r;
    792 		a1 = q[a1];
    793 		a2 = q[a2];
    794 		new = new | INSERT(a1, 9) | INSERT(a2, 10);
    795 		a1 = EXTRACT(a, 11) ^ r;
    796 		a2 = EXTRACT(a, 12) ^ r;
    797 		a1 = q[a1];
    798 		a2 = q[a2];
    799 		new = new | INSERT(a1, 11) | INSERT(a2, 12);
    800 #endif				/* RF_LONGSHIFT > 2 */
    801 		d ^= new;
    802 		*dest++ = d;
    803 		length--;
    804 	}
    805 #endif				/* _KERNEL */
    806 }
    807 /*
    808    recover columns a and b from the given p and q into
    809    bufs abuf and bbuf. All bufs are word aligned.
    810    Length is in bytes.
    811 */
    812 
    813 
    814 /*
    815  * XXX
    816  *
    817  * Everything about this seems wrong.
    818  */
    819 void
    820 rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b)
    821 	unsigned long *pbuf;
    822 	unsigned long *qbuf;
    823 	unsigned long *abuf;
    824 	unsigned long *bbuf;
    825 	unsigned length;
    826 	unsigned coeff_a;
    827 	unsigned coeff_b;
    828 {
    829 	unsigned long p, q, a, a0, a1;
    830 	int     col = (29 * coeff_a) + coeff_b;
    831 	unsigned char *q0 = &(rf_qinv[col][0]);
    832 
    833 	length /= 8;
    834 	while (length) {
    835 		p = *pbuf++;
    836 		q = *qbuf++;
    837 		a0 = EXTRACT(p, 0);
    838 		a1 = EXTRACT(q, 0);
    839 		a = q0[a0 << 5 | a1];
    840 #define MF(i) \
    841       a0 = EXTRACT(p,i); \
    842       a1 = EXTRACT(q,i); \
    843       a  = a | INSERT(q0[a0<<5 | a1],i)
    844 
    845 		MF(1);
    846 		MF(2);
    847 		MF(3);
    848 		MF(4);
    849 		MF(5);
    850 		MF(6);
    851 #if 0
    852 		MF(7);
    853 		MF(8);
    854 		MF(9);
    855 		MF(10);
    856 		MF(11);
    857 		MF(12);
    858 #endif				/* 0 */
    859 		*abuf++ = a;
    860 		*bbuf++ = a ^ p;
    861 		length--;
    862 	}
    863 }
    864 /*
    865    Lost parity and a data column. Recover that data column.
    866    Assume col coeff is lost. Let q the contents of Q after
    867    all surviving data columns have been q-xored out of it.
    868    Then we have the equation
    869 
    870    q[28-coeff][a_i ^ r_i+1] = q
    871 
    872    but q is cyclic with period 31.
    873    So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
    874       q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
    875 
    876    so a_i = r_{coeff+1} ^ q[3+coeff][q]
    877 
    878    The routine is passed q buffer and the buffer
    879    the data is to be recoverd into. They can be the same.
    880 */
    881 
    882 
    883 
    884 static void
    885 rf_InvertQ(
    886     unsigned long *qbuf,
    887     unsigned long *abuf,
    888     unsigned length,
    889     unsigned coeff)
    890 {
    891 	unsigned long a, new;
    892 	unsigned long a1, a2;
    893 	unsigned int *q = &(rf_qfor[3 + coeff][0]);
    894 	unsigned r = rf_rn[coeff + 1];
    895 
    896 	/* 13 5 bit quants in a 64 bit word */
    897 	length /= 8;
    898 	while (length) {
    899 		a = *qbuf++;
    900 		a1 = EXTRACT(a, 0);
    901 		a2 = EXTRACT(a, 1);
    902 		a1 = r ^ q[a1];
    903 		a2 = r ^ q[a2];
    904 		new = INSERT(a2, 1) | a1;
    905 #define M(i,j) \
    906       a1 = EXTRACT(a,i); \
    907       a2 = EXTRACT(a,j); \
    908       a1 = r ^ q[a1]; \
    909       a2 = r ^ q[a2]; \
    910       new = new | INSERT(a1,i) | INSERT(a2,j)
    911 
    912 		M(2, 3);
    913 		M(4, 5);
    914 		M(5, 6);
    915 #if RF_LONGSHIFT > 2
    916 		M(7, 8);
    917 		M(9, 10);
    918 		M(11, 12);
    919 #endif				/* RF_LONGSHIFT > 2 */
    920 		*abuf++ = new;
    921 		length--;
    922 	}
    923 }
    924 #endif				/* (RF_INCLUDE_DECL_PQ > 0) ||
    925 				 * (RF_INCLUDE_RAID6 > 0) */
    926