Home | History | Annotate | Line # | Download | only in raidframe
rf_pq.c revision 1.9.2.1
      1 /*	$NetBSD: rf_pq.c,v 1.9.2.1 2001/10/11 00:02:23 fvdl Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Daniel Stodolsky
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  * Code for RAID level 6 (P + Q) disk array architecture.
     31  */
     32 
     33 #include "rf_archs.h"
     34 
     35 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0)
     36 
     37 #include <dev/raidframe/raidframevar.h>
     38 
     39 #include "rf_raid.h"
     40 #include "rf_dag.h"
     41 #include "rf_dagffrd.h"
     42 #include "rf_dagffwr.h"
     43 #include "rf_dagdegrd.h"
     44 #include "rf_dagdegwr.h"
     45 #include "rf_dagutils.h"
     46 #include "rf_dagfuncs.h"
     47 #include "rf_etimer.h"
     48 #include "rf_pqdeg.h"
     49 #include "rf_general.h"
     50 #include "rf_map.h"
     51 #include "rf_pq.h"
     52 
     53 RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"};
     54 RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"};
     55 
     56 int
     57 rf_RegularONPFunc(node)
     58 	RF_DagNode_t *node;
     59 {
     60 	return (rf_RegularXorFunc(node));
     61 }
     62 /*
     63    same as simpleONQ func, but the coefficient is always 1
     64 */
     65 
     66 int
     67 rf_SimpleONPFunc(node)
     68 	RF_DagNode_t *node;
     69 {
     70 	return (rf_SimpleXorFunc(node));
     71 }
     72 
     73 int
     74 rf_RecoveryPFunc(node)
     75 	RF_DagNode_t *node;
     76 {
     77 	return (rf_RecoveryXorFunc(node));
     78 }
     79 
     80 int
     81 rf_RegularPFunc(node)
     82 	RF_DagNode_t *node;
     83 {
     84 	return (rf_RegularXorFunc(node));
     85 }
     86 #endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */
     87 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
     88 
     89 static void
     90 QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
     91     unsigned char coeff);
     92 static void
     93 rf_InvertQ(unsigned long *qbuf, unsigned long *abuf,
     94     unsigned length, unsigned coeff);
     95 
     96 RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"};
     97 RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"};
     98 RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"};
     99 
    100 void
    101 rf_PQDagSelect(
    102     RF_Raid_t * raidPtr,
    103     RF_IoType_t type,
    104     RF_AccessStripeMap_t * asmap,
    105     RF_VoidFuncPtr * createFunc)
    106 {
    107 	RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
    108 	unsigned ndfail = asmap->numDataFailed;
    109 	unsigned npfail = asmap->numParityFailed;
    110 	unsigned ntfail = npfail + ndfail;
    111 
    112 	RF_ASSERT(RF_IO_IS_R_OR_W(type));
    113 	if (ntfail > 2) {
    114 		RF_ERRORMSG("more than two disks failed in a single group!  Aborting I/O operation.\n");
    115 		 /* *infoFunc = */ *createFunc = NULL;
    116 		return;
    117 	}
    118 	/* ok, we can do this I/O */
    119 	if (type == RF_IO_TYPE_READ) {
    120 		switch (ndfail) {
    121 		case 0:
    122 			/* fault free read */
    123 			*createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG;	/* same as raid 5 */
    124 			break;
    125 		case 1:
    126 			/* lost a single data unit */
    127 			/* two cases: (1) parity is not lost. do a normal raid
    128 			 * 5 reconstruct read. (2) parity is lost. do a
    129 			 * reconstruct read using "q". */
    130 			if (ntfail == 2) {	/* also lost redundancy */
    131 				if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
    132 					*createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG;
    133 				else
    134 					*createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG;
    135 			} else {
    136 				/* P and Q are ok. But is there a failure in
    137 				 * some unaccessed data unit? */
    138 				if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
    139 					*createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
    140 				else
    141 					*createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG;
    142 			}
    143 			break;
    144 		case 2:
    145 			/* lost two data units */
    146 			/* *infoFunc = PQOneTwo; */
    147 			*createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
    148 			break;
    149 		}
    150 		return;
    151 	}
    152 	/* a write */
    153 	switch (ntfail) {
    154 	case 0:		/* fault free */
    155 		if (rf_suppressLocksAndLargeWrites ||
    156 		    (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
    157 			(asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
    158 
    159 			*createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG;
    160 		} else {
    161 			*createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG;
    162 		}
    163 		break;
    164 
    165 	case 1:		/* single disk fault */
    166 		if (npfail == 1) {
    167 			RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
    168 			if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) {	/* q died, treat like
    169 										 * normal mode raid5
    170 										 * write. */
    171 				if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
    172 				    || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
    173 					*createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG;
    174 				else
    175 					*createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG;
    176 			} else {/* parity died, small write only updating Q */
    177 				if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
    178 				    || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
    179 					*createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG;
    180 				else
    181 					*createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG;
    182 			}
    183 		} else {	/* data missing. Do a P reconstruct write if
    184 				 * only a single data unit is lost in the
    185 				 * stripe, otherwise a PQ reconstruct write. */
    186 			if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
    187 				*createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
    188 			else
    189 				*createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG;
    190 		}
    191 		break;
    192 
    193 	case 2:		/* two disk faults */
    194 		switch (npfail) {
    195 		case 2:	/* both p and q dead */
    196 			*createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG;
    197 			break;
    198 		case 1:	/* either p or q and dead data */
    199 			RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
    200 			RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
    201 			if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
    202 				*createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG;
    203 			else
    204 				*createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG;
    205 			break;
    206 		case 0:	/* double data loss */
    207 			*createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
    208 			break;
    209 		}
    210 		break;
    211 
    212 	default:		/* more than 2 disk faults */
    213 		*createFunc = NULL;
    214 		RF_PANIC();
    215 	}
    216 	return;
    217 }
    218 /*
    219    Used as a stop gap info function
    220 */
    221 #if 0
    222 static void
    223 PQOne(raidPtr, nSucc, nAnte, asmap)
    224 	RF_Raid_t *raidPtr;
    225 	int    *nSucc;
    226 	int    *nAnte;
    227 	RF_AccessStripeMap_t *asmap;
    228 {
    229 	*nSucc = *nAnte = 1;
    230 }
    231 
    232 static void
    233 PQOneTwo(raidPtr, nSucc, nAnte, asmap)
    234 	RF_Raid_t *raidPtr;
    235 	int    *nSucc;
    236 	int    *nAnte;
    237 	RF_AccessStripeMap_t *asmap;
    238 {
    239 	*nSucc = 1;
    240 	*nAnte = 2;
    241 }
    242 #endif
    243 
    244 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
    245 {
    246 	rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2,
    247 	    rf_RegularPQFunc, RF_FALSE);
    248 }
    249 
    250 int
    251 rf_RegularONQFunc(node)
    252 	RF_DagNode_t *node;
    253 {
    254 	int     np = node->numParams;
    255 	int     d;
    256 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
    257 	int     i;
    258 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    259 	RF_Etimer_t timer;
    260 	char   *qbuf, *qpbuf;
    261 	char   *obuf, *nbuf;
    262 	RF_PhysDiskAddr_t *old, *new;
    263 	unsigned long coeff;
    264 	unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
    265 
    266 	RF_ETIMER_START(timer);
    267 
    268 	d = (np - 3) / 4;
    269 	RF_ASSERT(4 * d + 3 == np);
    270 	qbuf = (char *) node->params[2 * d + 1].p;	/* q buffer */
    271 	for (i = 0; i < d; i++) {
    272 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
    273 		obuf = (char *) node->params[2 * i + 1].p;
    274 		new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
    275 		nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
    276 		RF_ASSERT(new->numSector == old->numSector);
    277 		RF_ASSERT(new->raidAddress == old->raidAddress);
    278 		/* the stripe unit within the stripe tells us the coefficient
    279 		 * to use for the multiply. */
    280 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
    281 		/* compute the data unit offset within the column, then add
    282 		 * one */
    283 		coeff = (coeff % raidPtr->Layout.numDataCol);
    284 		qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
    285 		QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
    286 	}
    287 
    288 	RF_ETIMER_STOP(timer);
    289 	RF_ETIMER_EVAL(timer);
    290 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    291 	rf_GenericWakeupFunc(node, 0);	/* call wake func explicitly since no
    292 					 * I/O in this node */
    293 	return (0);
    294 }
    295 /*
    296    See the SimpleXORFunc for the difference between a simple and regular func.
    297    These Q functions should be used for
    298 
    299          new q = Q(data,old data,old q)
    300 
    301    style updates and not for
    302 
    303          q = ( new data, new data, .... )
    304 
    305    computations.
    306 
    307    The simple q takes 2(2d+1)+1 params, where d is the number
    308    of stripes written. The order of params is
    309    old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
    310    [2d] old q pda_0, old q buffer
    311    [2d_2] new data pda_0, new data buffer_0, ...                                    new data pda_d, new data buffer_d
    312    raidPtr
    313 */
    314 
    315 int
    316 rf_SimpleONQFunc(node)
    317 	RF_DagNode_t *node;
    318 {
    319 	int     np = node->numParams;
    320 	int     d;
    321 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
    322 	int     i;
    323 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    324 	RF_Etimer_t timer;
    325 	char   *qbuf;
    326 	char   *obuf, *nbuf;
    327 	RF_PhysDiskAddr_t *old, *new;
    328 	unsigned long coeff;
    329 
    330 	RF_ETIMER_START(timer);
    331 
    332 	d = (np - 3) / 4;
    333 	RF_ASSERT(4 * d + 3 == np);
    334 	qbuf = (char *) node->params[2 * d + 1].p;	/* q buffer */
    335 	for (i = 0; i < d; i++) {
    336 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
    337 		obuf = (char *) node->params[2 * i + 1].p;
    338 		new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
    339 		nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
    340 		RF_ASSERT(new->numSector == old->numSector);
    341 		RF_ASSERT(new->raidAddress == old->raidAddress);
    342 		/* the stripe unit within the stripe tells us the coefficient
    343 		 * to use for the multiply. */
    344 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
    345 		/* compute the data unit offset within the column, then add
    346 		 * one */
    347 		coeff = (coeff % raidPtr->Layout.numDataCol);
    348 		QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
    349 	}
    350 
    351 	RF_ETIMER_STOP(timer);
    352 	RF_ETIMER_EVAL(timer);
    353 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    354 	rf_GenericWakeupFunc(node, 0);	/* call wake func explicitly since no
    355 					 * I/O in this node */
    356 	return (0);
    357 }
    358 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
    359 {
    360 	rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);
    361 }
    362 
    363 static void RegularQSubr(RF_DagNode_t *node, char   *qbuf);
    364 
    365 static void
    366 RegularQSubr(node, qbuf)
    367 	RF_DagNode_t *node;
    368 	char   *qbuf;
    369 {
    370 	int     np = node->numParams;
    371 	int     d;
    372 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
    373 	unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
    374 	int     i;
    375 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    376 	RF_Etimer_t timer;
    377 	char   *obuf, *qpbuf;
    378 	RF_PhysDiskAddr_t *old;
    379 	unsigned long coeff;
    380 
    381 	RF_ETIMER_START(timer);
    382 
    383 	d = (np - 1) / 2;
    384 	RF_ASSERT(2 * d + 1 == np);
    385 	for (i = 0; i < d; i++) {
    386 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
    387 		obuf = (char *) node->params[2 * i + 1].p;
    388 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
    389 		/* compute the data unit offset within the column, then add
    390 		 * one */
    391 		coeff = (coeff % raidPtr->Layout.numDataCol);
    392 		/* the input buffers may not all be aligned with the start of
    393 		 * the stripe. so shift by their sector offset within the
    394 		 * stripe unit */
    395 		qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
    396 		rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
    397 	}
    398 
    399 	RF_ETIMER_STOP(timer);
    400 	RF_ETIMER_EVAL(timer);
    401 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    402 }
    403 /*
    404    used in degraded writes.
    405 */
    406 
    407 static void DegrQSubr(RF_DagNode_t *node);
    408 
    409 static void
    410 DegrQSubr(node)
    411 	RF_DagNode_t *node;
    412 {
    413 	int     np = node->numParams;
    414 	int     d;
    415 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
    416 	unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
    417 	int     i;
    418 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    419 	RF_Etimer_t timer;
    420 	char   *qbuf = node->results[1];
    421 	char   *obuf, *qpbuf;
    422 	RF_PhysDiskAddr_t *old;
    423 	unsigned long coeff;
    424 	unsigned fail_start;
    425 	int     j;
    426 
    427 	old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
    428 	fail_start = old->startSector % secPerSU;
    429 
    430 	RF_ETIMER_START(timer);
    431 
    432 	d = (np - 2) / 2;
    433 	RF_ASSERT(2 * d + 2 == np);
    434 	for (i = 0; i < d; i++) {
    435 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
    436 		obuf = (char *) node->params[2 * i + 1].p;
    437 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
    438 		/* compute the data unit offset within the column, then add
    439 		 * one */
    440 		coeff = (coeff % raidPtr->Layout.numDataCol);
    441 		/* the input buffers may not all be aligned with the start of
    442 		 * the stripe. so shift by their sector offset within the
    443 		 * stripe unit */
    444 		j = old->startSector % secPerSU;
    445 		RF_ASSERT(j >= fail_start);
    446 		qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
    447 		rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
    448 	}
    449 
    450 	RF_ETIMER_STOP(timer);
    451 	RF_ETIMER_EVAL(timer);
    452 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    453 }
    454 /*
    455    Called by large write code to compute the new parity and the new q.
    456 
    457    structure of the params:
    458 
    459    pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
    460    raidPtr
    461 
    462    for a total of 2d+1 arguments.
    463    The result buffers results[0], results[1] are the buffers for the p and q,
    464    respectively.
    465 
    466    We compute Q first, then compute P. The P calculation may try to reuse
    467    one of the input buffers for its output, so if we computed P first, we would
    468    corrupt the input for the q calculation.
    469 */
    470 
    471 int
    472 rf_RegularPQFunc(node)
    473 	RF_DagNode_t *node;
    474 {
    475 	RegularQSubr(node, node->results[1]);
    476 	return (rf_RegularXorFunc(node));	/* does the wakeup */
    477 }
    478 
    479 int
    480 rf_RegularQFunc(node)
    481 	RF_DagNode_t *node;
    482 {
    483 	/* Almost ... adjust Qsubr args */
    484 	RegularQSubr(node, node->results[0]);
    485 	rf_GenericWakeupFunc(node, 0);	/* call wake func explicitly since no
    486 					 * I/O in this node */
    487 	return (0);
    488 }
    489 /*
    490    Called by singly degraded write code to compute the new parity and the new q.
    491 
    492    structure of the params:
    493 
    494    pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
    495    failedPDA raidPtr
    496 
    497    for a total of 2d+2 arguments.
    498    The result buffers results[0], results[1] are the buffers for the parity and q,
    499    respectively.
    500 
    501    We compute Q first, then compute parity. The parity calculation may try to reuse
    502    one of the input buffers for its output, so if we computed parity first, we would
    503    corrupt the input for the q calculation.
    504 
    505    We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
    506 */
    507 
    508 void
    509 rf_Degraded_100_PQFunc(node)
    510 	RF_DagNode_t *node;
    511 {
    512 	int     np = node->numParams;
    513 
    514 	RF_ASSERT(np >= 2);
    515 	DegrQSubr(node);
    516 	rf_RecoveryXorFunc(node);
    517 }
    518 
    519 
    520 /*
    521    The two below are used when reading a stripe with a single lost data unit.
    522    The parameters are
    523 
    524    pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
    525 
    526    and results[0] contains the data buffer. Which is originally zero-filled.
    527 
    528 */
    529 
    530 /* this Q func is used by the degraded-mode dag functions to recover lost data.
    531  * the second-to-last parameter is the PDA for the failed portion of the access.
    532  * the code here looks at this PDA and assumes that the xor target buffer is
    533  * equal in size to the number of sectors in the failed PDA.  It then uses
    534  * the other PDAs in the parameter list to determine where within the target
    535  * buffer the corresponding data should be xored.
    536  *
    537  * Recall the basic equation is
    538  *
    539  *     Q = ( data_1 + 2 * data_2 ... + k * data_k  ) mod 256
    540  *
    541  * so to recover data_j we need
    542  *
    543  *    J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
    544  *
    545  * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
    546  * copying Q into it. Then we need to do a table lookup to convert to solve
    547  *   data_j /= J
    548  *
    549  *
    550  */
    551 int
    552 rf_RecoveryQFunc(node)
    553 	RF_DagNode_t *node;
    554 {
    555 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    556 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    557 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    558 	int     i;
    559 	RF_PhysDiskAddr_t *pda;
    560 	RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    561 	char   *srcbuf, *destbuf;
    562 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    563 	RF_Etimer_t timer;
    564 	unsigned long coeff;
    565 
    566 	RF_ETIMER_START(timer);
    567 	/* start by copying Q into the buffer */
    568 	bcopy(node->params[node->numParams - 3].p, node->results[0],
    569 	    rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
    570 	for (i = 0; i < node->numParams - 4; i += 2) {
    571 		RF_ASSERT(node->params[i + 1].p != node->results[0]);
    572 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    573 		srcbuf = (char *) node->params[i + 1].p;
    574 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    575 		destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    576 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress);
    577 		/* compute the data unit offset within the column */
    578 		coeff = (coeff % raidPtr->Layout.numDataCol);
    579 		rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
    580 	}
    581 	/* Do the nasty inversion now */
    582 	coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol);
    583 	rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
    584 	RF_ETIMER_STOP(timer);
    585 	RF_ETIMER_EVAL(timer);
    586 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    587 	rf_GenericWakeupFunc(node, 0);
    588 	return (0);
    589 }
    590 
    591 int
    592 rf_RecoveryPQFunc(node)
    593 	RF_DagNode_t *node;
    594 {
    595 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    596 	printf("raid%d: Recovery from PQ not implemented.\n",raidPtr->raidid);
    597 	return (1);
    598 }
    599 /*
    600    Degraded write Q subroutine.
    601    Used when P is dead.
    602    Large-write style Q computation.
    603    Parameters
    604 
    605    (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
    606 
    607    We ignore failedPDA.
    608 
    609    This is a "simple style" recovery func.
    610 */
    611 
    612 void
    613 rf_PQ_DegradedWriteQFunc(node)
    614 	RF_DagNode_t *node;
    615 {
    616 	int     np = node->numParams;
    617 	int     d;
    618 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
    619 	unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
    620 	int     i;
    621 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    622 	RF_Etimer_t timer;
    623 	char   *qbuf = node->results[0];
    624 	char   *obuf, *qpbuf;
    625 	RF_PhysDiskAddr_t *old;
    626 	unsigned long coeff;
    627 	int     fail_start, j;
    628 
    629 	old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
    630 	fail_start = old->startSector % secPerSU;
    631 
    632 	RF_ETIMER_START(timer);
    633 
    634 	d = (np - 2) / 2;
    635 	RF_ASSERT(2 * d + 2 == np);
    636 
    637 	for (i = 0; i < d; i++) {
    638 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
    639 		obuf = (char *) node->params[2 * i + 1].p;
    640 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
    641 		/* compute the data unit offset within the column, then add
    642 		 * one */
    643 		coeff = (coeff % raidPtr->Layout.numDataCol);
    644 		j = old->startSector % secPerSU;
    645 		RF_ASSERT(j >= fail_start);
    646 		qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
    647 		rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
    648 	}
    649 
    650 	RF_ETIMER_STOP(timer);
    651 	RF_ETIMER_EVAL(timer);
    652 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
    653 	rf_GenericWakeupFunc(node, 0);
    654 }
    655 
    656 
    657 
    658 
    659 /* Q computations */
    660 
    661 /*
    662    coeff - colummn;
    663 
    664    compute  dest ^= qfor[28-coeff][rn[coeff+1] a]
    665 
    666    on 5-bit basis;
    667    length in bytes;
    668 */
    669 
    670 void
    671 rf_IncQ(dest, buf, length, coeff)
    672 	unsigned long *dest;
    673 	unsigned long *buf;
    674 	unsigned length;
    675 	unsigned coeff;
    676 {
    677 	unsigned long a, d, new;
    678 	unsigned long a1, a2;
    679 	unsigned int *q = &(rf_qfor[28 - coeff][0]);
    680 	unsigned r = rf_rn[coeff + 1];
    681 
    682 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
    683 #define INSERT(a,i) (a << (5L*i))
    684 
    685 	length /= 8;
    686 	/* 13 5 bit quants in a 64 bit word */
    687 	while (length) {
    688 		a = *buf++;
    689 		d = *dest;
    690 		a1 = EXTRACT(a, 0) ^ r;
    691 		a2 = EXTRACT(a, 1) ^ r;
    692 		new = INSERT(a2, 1) | a1;
    693 		a1 = EXTRACT(a, 2) ^ r;
    694 		a2 = EXTRACT(a, 3) ^ r;
    695 		a1 = q[a1];
    696 		a2 = q[a2];
    697 		new = new | INSERT(a1, 2) | INSERT(a2, 3);
    698 		a1 = EXTRACT(a, 4) ^ r;
    699 		a2 = EXTRACT(a, 5) ^ r;
    700 		a1 = q[a1];
    701 		a2 = q[a2];
    702 		new = new | INSERT(a1, 4) | INSERT(a2, 5);
    703 		a1 = EXTRACT(a, 5) ^ r;
    704 		a2 = EXTRACT(a, 6) ^ r;
    705 		a1 = q[a1];
    706 		a2 = q[a2];
    707 		new = new | INSERT(a1, 5) | INSERT(a2, 6);
    708 #if RF_LONGSHIFT > 2
    709 		a1 = EXTRACT(a, 7) ^ r;
    710 		a2 = EXTRACT(a, 8) ^ r;
    711 		a1 = q[a1];
    712 		a2 = q[a2];
    713 		new = new | INSERT(a1, 7) | INSERT(a2, 8);
    714 		a1 = EXTRACT(a, 9) ^ r;
    715 		a2 = EXTRACT(a, 10) ^ r;
    716 		a1 = q[a1];
    717 		a2 = q[a2];
    718 		new = new | INSERT(a1, 9) | INSERT(a2, 10);
    719 		a1 = EXTRACT(a, 11) ^ r;
    720 		a2 = EXTRACT(a, 12) ^ r;
    721 		a1 = q[a1];
    722 		a2 = q[a2];
    723 		new = new | INSERT(a1, 11) | INSERT(a2, 12);
    724 #endif				/* RF_LONGSHIFT > 2 */
    725 		d ^= new;
    726 		*dest++ = d;
    727 		length--;
    728 	}
    729 }
    730 /*
    731    compute
    732 
    733    dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
    734 
    735    on a five bit basis.
    736    optimization: compute old ^ new on 64 bit basis.
    737 
    738    length in bytes.
    739 */
    740 
    741 static void
    742 QDelta(
    743     char *dest,
    744     char *obuf,
    745     char *nbuf,
    746     unsigned length,
    747     unsigned char coeff)
    748 {
    749 	unsigned long a, d, new;
    750 	unsigned long a1, a2;
    751 	unsigned int *q = &(rf_qfor[28 - coeff][0]);
    752 	unsigned int r = rf_rn[coeff + 1];
    753 
    754 	r = a1 = a2 = new = d = a = 0; /* XXX for now... */
    755 	q = NULL; /* XXX for now */
    756 
    757 #ifdef _KERNEL
    758 	/* PQ in kernel currently not supported because the encoding/decoding
    759 	 * table is not present */
    760 	memset(dest, 0, length);
    761 #else				/* KERNEL */
    762 	/* this code probably doesn't work and should be rewritten  -wvcii */
    763 	/* 13 5 bit quants in a 64 bit word */
    764 	length /= 8;
    765 	while (length) {
    766 		a = *obuf++;	/* XXX need to reorg to avoid cache conflicts */
    767 		a ^= *nbuf++;
    768 		d = *dest;
    769 		a1 = EXTRACT(a, 0) ^ r;
    770 		a2 = EXTRACT(a, 1) ^ r;
    771 		a1 = q[a1];
    772 		a2 = q[a2];
    773 		new = INSERT(a2, 1) | a1;
    774 		a1 = EXTRACT(a, 2) ^ r;
    775 		a2 = EXTRACT(a, 3) ^ r;
    776 		a1 = q[a1];
    777 		a2 = q[a2];
    778 		new = new | INSERT(a1, 2) | INSERT(a2, 3);
    779 		a1 = EXTRACT(a, 4) ^ r;
    780 		a2 = EXTRACT(a, 5) ^ r;
    781 		a1 = q[a1];
    782 		a2 = q[a2];
    783 		new = new | INSERT(a1, 4) | INSERT(a2, 5);
    784 		a1 = EXTRACT(a, 5) ^ r;
    785 		a2 = EXTRACT(a, 6) ^ r;
    786 		a1 = q[a1];
    787 		a2 = q[a2];
    788 		new = new | INSERT(a1, 5) | INSERT(a2, 6);
    789 #if RF_LONGSHIFT > 2
    790 		a1 = EXTRACT(a, 7) ^ r;
    791 		a2 = EXTRACT(a, 8) ^ r;
    792 		a1 = q[a1];
    793 		a2 = q[a2];
    794 		new = new | INSERT(a1, 7) | INSERT(a2, 8);
    795 		a1 = EXTRACT(a, 9) ^ r;
    796 		a2 = EXTRACT(a, 10) ^ r;
    797 		a1 = q[a1];
    798 		a2 = q[a2];
    799 		new = new | INSERT(a1, 9) | INSERT(a2, 10);
    800 		a1 = EXTRACT(a, 11) ^ r;
    801 		a2 = EXTRACT(a, 12) ^ r;
    802 		a1 = q[a1];
    803 		a2 = q[a2];
    804 		new = new | INSERT(a1, 11) | INSERT(a2, 12);
    805 #endif				/* RF_LONGSHIFT > 2 */
    806 		d ^= new;
    807 		*dest++ = d;
    808 		length--;
    809 	}
    810 #endif				/* _KERNEL */
    811 }
    812 /*
    813    recover columns a and b from the given p and q into
    814    bufs abuf and bbuf. All bufs are word aligned.
    815    Length is in bytes.
    816 */
    817 
    818 
    819 /*
    820  * XXX
    821  *
    822  * Everything about this seems wrong.
    823  */
    824 void
    825 rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b)
    826 	unsigned long *pbuf;
    827 	unsigned long *qbuf;
    828 	unsigned long *abuf;
    829 	unsigned long *bbuf;
    830 	unsigned length;
    831 	unsigned coeff_a;
    832 	unsigned coeff_b;
    833 {
    834 	unsigned long p, q, a, a0, a1;
    835 	int     col = (29 * coeff_a) + coeff_b;
    836 	unsigned char *q0 = &(rf_qinv[col][0]);
    837 
    838 	length /= 8;
    839 	while (length) {
    840 		p = *pbuf++;
    841 		q = *qbuf++;
    842 		a0 = EXTRACT(p, 0);
    843 		a1 = EXTRACT(q, 0);
    844 		a = q0[a0 << 5 | a1];
    845 #define MF(i) \
    846       a0 = EXTRACT(p,i); \
    847       a1 = EXTRACT(q,i); \
    848       a  = a | INSERT(q0[a0<<5 | a1],i)
    849 
    850 		MF(1);
    851 		MF(2);
    852 		MF(3);
    853 		MF(4);
    854 		MF(5);
    855 		MF(6);
    856 #if 0
    857 		MF(7);
    858 		MF(8);
    859 		MF(9);
    860 		MF(10);
    861 		MF(11);
    862 		MF(12);
    863 #endif				/* 0 */
    864 		*abuf++ = a;
    865 		*bbuf++ = a ^ p;
    866 		length--;
    867 	}
    868 }
    869 /*
    870    Lost parity and a data column. Recover that data column.
    871    Assume col coeff is lost. Let q the contents of Q after
    872    all surviving data columns have been q-xored out of it.
    873    Then we have the equation
    874 
    875    q[28-coeff][a_i ^ r_i+1] = q
    876 
    877    but q is cyclic with period 31.
    878    So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
    879       q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
    880 
    881    so a_i = r_{coeff+1} ^ q[3+coeff][q]
    882 
    883    The routine is passed q buffer and the buffer
    884    the data is to be recoverd into. They can be the same.
    885 */
    886 
    887 
    888 
    889 static void
    890 rf_InvertQ(
    891     unsigned long *qbuf,
    892     unsigned long *abuf,
    893     unsigned length,
    894     unsigned coeff)
    895 {
    896 	unsigned long a, new;
    897 	unsigned long a1, a2;
    898 	unsigned int *q = &(rf_qfor[3 + coeff][0]);
    899 	unsigned r = rf_rn[coeff + 1];
    900 
    901 	/* 13 5 bit quants in a 64 bit word */
    902 	length /= 8;
    903 	while (length) {
    904 		a = *qbuf++;
    905 		a1 = EXTRACT(a, 0);
    906 		a2 = EXTRACT(a, 1);
    907 		a1 = r ^ q[a1];
    908 		a2 = r ^ q[a2];
    909 		new = INSERT(a2, 1) | a1;
    910 #define M(i,j) \
    911       a1 = EXTRACT(a,i); \
    912       a2 = EXTRACT(a,j); \
    913       a1 = r ^ q[a1]; \
    914       a2 = r ^ q[a2]; \
    915       new = new | INSERT(a1,i) | INSERT(a2,j)
    916 
    917 		M(2, 3);
    918 		M(4, 5);
    919 		M(5, 6);
    920 #if RF_LONGSHIFT > 2
    921 		M(7, 8);
    922 		M(9, 10);
    923 		M(11, 12);
    924 #endif				/* RF_LONGSHIFT > 2 */
    925 		*abuf++ = new;
    926 		length--;
    927 	}
    928 }
    929 #endif				/* (RF_INCLUDE_DECL_PQ > 0) ||
    930 				 * (RF_INCLUDE_RAID6 > 0) */
    931