rf_pq.c revision 1.6 1 /* $NetBSD: rf_pq.c,v 1.6 1999/08/15 03:44:46 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Daniel Stodolsky
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /*
30 * Code for RAID level 6 (P + Q) disk array architecture.
31 */
32
33 #include "rf_archs.h"
34 #include "rf_types.h"
35 #include "rf_raid.h"
36 #include "rf_dag.h"
37 #include "rf_dagffrd.h"
38 #include "rf_dagffwr.h"
39 #include "rf_dagdegrd.h"
40 #include "rf_dagdegwr.h"
41 #include "rf_dagutils.h"
42 #include "rf_dagfuncs.h"
43 #include "rf_threadid.h"
44 #include "rf_etimer.h"
45 #include "rf_pqdeg.h"
46 #include "rf_general.h"
47 #include "rf_map.h"
48 #include "rf_pq.h"
49
50 RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"};
51 RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"};
52
53 int
54 rf_RegularONPFunc(node)
55 RF_DagNode_t *node;
56 {
57 return (rf_RegularXorFunc(node));
58 }
59 /*
60 same as simpleONQ func, but the coefficient is always 1
61 */
62
63 int
64 rf_SimpleONPFunc(node)
65 RF_DagNode_t *node;
66 {
67 return (rf_SimpleXorFunc(node));
68 }
69
70 int
71 rf_RecoveryPFunc(node)
72 RF_DagNode_t *node;
73 {
74 return (rf_RecoveryXorFunc(node));
75 }
76
77 int
78 rf_RegularPFunc(node)
79 RF_DagNode_t *node;
80 {
81 return (rf_RegularXorFunc(node));
82 }
83 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
84
85 static void
86 QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
87 unsigned char coeff);
88 static void
89 rf_InvertQ(unsigned long *qbuf, unsigned long *abuf,
90 unsigned length, unsigned coeff);
91
92 RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"};
93 RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"};
94 RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"};
95
96 void
97 rf_PQDagSelect(
98 RF_Raid_t * raidPtr,
99 RF_IoType_t type,
100 RF_AccessStripeMap_t * asmap,
101 RF_VoidFuncPtr * createFunc)
102 {
103 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
104 unsigned ndfail = asmap->numDataFailed;
105 unsigned npfail = asmap->numParityFailed;
106 unsigned ntfail = npfail + ndfail;
107
108 RF_ASSERT(RF_IO_IS_R_OR_W(type));
109 if (ntfail > 2) {
110 RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n");
111 /* *infoFunc = */ *createFunc = NULL;
112 return;
113 }
114 /* ok, we can do this I/O */
115 if (type == RF_IO_TYPE_READ) {
116 switch (ndfail) {
117 case 0:
118 /* fault free read */
119 *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */
120 break;
121 case 1:
122 /* lost a single data unit */
123 /* two cases: (1) parity is not lost. do a normal raid
124 * 5 reconstruct read. (2) parity is lost. do a
125 * reconstruct read using "q". */
126 if (ntfail == 2) { /* also lost redundancy */
127 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
128 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG;
129 else
130 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG;
131 } else {
132 /* P and Q are ok. But is there a failure in
133 * some unaccessed data unit? */
134 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
135 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
136 else
137 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG;
138 }
139 break;
140 case 2:
141 /* lost two data units */
142 /* *infoFunc = PQOneTwo; */
143 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
144 break;
145 }
146 return;
147 }
148 /* a write */
149 switch (ntfail) {
150 case 0: /* fault free */
151 if (rf_suppressLocksAndLargeWrites ||
152 (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
153 (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
154
155 *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG;
156 } else {
157 *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG;
158 }
159 break;
160
161 case 1: /* single disk fault */
162 if (npfail == 1) {
163 RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
164 if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like
165 * normal mode raid5
166 * write. */
167 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
168 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
169 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG;
170 else
171 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG;
172 } else {/* parity died, small write only updating Q */
173 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
174 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
175 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG;
176 else
177 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG;
178 }
179 } else { /* data missing. Do a P reconstruct write if
180 * only a single data unit is lost in the
181 * stripe, otherwise a PQ reconstruct write. */
182 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
183 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
184 else
185 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG;
186 }
187 break;
188
189 case 2: /* two disk faults */
190 switch (npfail) {
191 case 2: /* both p and q dead */
192 *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG;
193 break;
194 case 1: /* either p or q and dead data */
195 RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
196 RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
197 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
198 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG;
199 else
200 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG;
201 break;
202 case 0: /* double data loss */
203 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
204 break;
205 }
206 break;
207
208 default: /* more than 2 disk faults */
209 *createFunc = NULL;
210 RF_PANIC();
211 }
212 return;
213 }
214 /*
215 Used as a stop gap info function
216 */
217 #if 0
218 static void
219 PQOne(raidPtr, nSucc, nAnte, asmap)
220 RF_Raid_t *raidPtr;
221 int *nSucc;
222 int *nAnte;
223 RF_AccessStripeMap_t *asmap;
224 {
225 *nSucc = *nAnte = 1;
226 }
227
228 static void
229 PQOneTwo(raidPtr, nSucc, nAnte, asmap)
230 RF_Raid_t *raidPtr;
231 int *nSucc;
232 int *nAnte;
233 RF_AccessStripeMap_t *asmap;
234 {
235 *nSucc = 1;
236 *nAnte = 2;
237 }
238 #endif
239
240 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
241 {
242 rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2,
243 rf_RegularPQFunc, RF_FALSE);
244 }
245
246 int
247 rf_RegularONQFunc(node)
248 RF_DagNode_t *node;
249 {
250 int np = node->numParams;
251 int d;
252 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
253 int i;
254 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
255 RF_Etimer_t timer;
256 char *qbuf, *qpbuf;
257 char *obuf, *nbuf;
258 RF_PhysDiskAddr_t *old, *new;
259 unsigned long coeff;
260 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
261
262 RF_ETIMER_START(timer);
263
264 d = (np - 3) / 4;
265 RF_ASSERT(4 * d + 3 == np);
266 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
267 for (i = 0; i < d; i++) {
268 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
269 obuf = (char *) node->params[2 * i + 1].p;
270 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
271 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
272 RF_ASSERT(new->numSector == old->numSector);
273 RF_ASSERT(new->raidAddress == old->raidAddress);
274 /* the stripe unit within the stripe tells us the coefficient
275 * to use for the multiply. */
276 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
277 /* compute the data unit offset within the column, then add
278 * one */
279 coeff = (coeff % raidPtr->Layout.numDataCol);
280 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
281 QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
282 }
283
284 RF_ETIMER_STOP(timer);
285 RF_ETIMER_EVAL(timer);
286 tracerec->q_us += RF_ETIMER_VAL_US(timer);
287 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
288 * I/O in this node */
289 return (0);
290 }
291 /*
292 See the SimpleXORFunc for the difference between a simple and regular func.
293 These Q functions should be used for
294
295 new q = Q(data,old data,old q)
296
297 style updates and not for
298
299 q = ( new data, new data, .... )
300
301 computations.
302
303 The simple q takes 2(2d+1)+1 params, where d is the number
304 of stripes written. The order of params is
305 old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
306 [2d] old q pda_0, old q buffer
307 [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d
308 raidPtr
309 */
310
311 int
312 rf_SimpleONQFunc(node)
313 RF_DagNode_t *node;
314 {
315 int np = node->numParams;
316 int d;
317 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
318 int i;
319 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
320 RF_Etimer_t timer;
321 char *qbuf;
322 char *obuf, *nbuf;
323 RF_PhysDiskAddr_t *old, *new;
324 unsigned long coeff;
325
326 RF_ETIMER_START(timer);
327
328 d = (np - 3) / 4;
329 RF_ASSERT(4 * d + 3 == np);
330 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
331 for (i = 0; i < d; i++) {
332 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
333 obuf = (char *) node->params[2 * i + 1].p;
334 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
335 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
336 RF_ASSERT(new->numSector == old->numSector);
337 RF_ASSERT(new->raidAddress == old->raidAddress);
338 /* the stripe unit within the stripe tells us the coefficient
339 * to use for the multiply. */
340 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
341 /* compute the data unit offset within the column, then add
342 * one */
343 coeff = (coeff % raidPtr->Layout.numDataCol);
344 QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
345 }
346
347 RF_ETIMER_STOP(timer);
348 RF_ETIMER_EVAL(timer);
349 tracerec->q_us += RF_ETIMER_VAL_US(timer);
350 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
351 * I/O in this node */
352 return (0);
353 }
354 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
355 {
356 rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);
357 }
358
359 static void RegularQSubr(RF_DagNode_t *node, char *qbuf);
360
361 static void
362 RegularQSubr(node, qbuf)
363 RF_DagNode_t *node;
364 char *qbuf;
365 {
366 int np = node->numParams;
367 int d;
368 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
369 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
370 int i;
371 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
372 RF_Etimer_t timer;
373 char *obuf, *qpbuf;
374 RF_PhysDiskAddr_t *old;
375 unsigned long coeff;
376
377 RF_ETIMER_START(timer);
378
379 d = (np - 1) / 2;
380 RF_ASSERT(2 * d + 1 == np);
381 for (i = 0; i < d; i++) {
382 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
383 obuf = (char *) node->params[2 * i + 1].p;
384 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
385 /* compute the data unit offset within the column, then add
386 * one */
387 coeff = (coeff % raidPtr->Layout.numDataCol);
388 /* the input buffers may not all be aligned with the start of
389 * the stripe. so shift by their sector offset within the
390 * stripe unit */
391 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
392 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
393 }
394
395 RF_ETIMER_STOP(timer);
396 RF_ETIMER_EVAL(timer);
397 tracerec->q_us += RF_ETIMER_VAL_US(timer);
398 }
399 /*
400 used in degraded writes.
401 */
402
403 static void DegrQSubr(RF_DagNode_t *node);
404
405 static void
406 DegrQSubr(node)
407 RF_DagNode_t *node;
408 {
409 int np = node->numParams;
410 int d;
411 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
412 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
413 int i;
414 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
415 RF_Etimer_t timer;
416 char *qbuf = node->results[1];
417 char *obuf, *qpbuf;
418 RF_PhysDiskAddr_t *old;
419 unsigned long coeff;
420 unsigned fail_start;
421 int j;
422
423 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
424 fail_start = old->startSector % secPerSU;
425
426 RF_ETIMER_START(timer);
427
428 d = (np - 2) / 2;
429 RF_ASSERT(2 * d + 2 == np);
430 for (i = 0; i < d; i++) {
431 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
432 obuf = (char *) node->params[2 * i + 1].p;
433 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
434 /* compute the data unit offset within the column, then add
435 * one */
436 coeff = (coeff % raidPtr->Layout.numDataCol);
437 /* the input buffers may not all be aligned with the start of
438 * the stripe. so shift by their sector offset within the
439 * stripe unit */
440 j = old->startSector % secPerSU;
441 RF_ASSERT(j >= fail_start);
442 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
443 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
444 }
445
446 RF_ETIMER_STOP(timer);
447 RF_ETIMER_EVAL(timer);
448 tracerec->q_us += RF_ETIMER_VAL_US(timer);
449 }
450 /*
451 Called by large write code to compute the new parity and the new q.
452
453 structure of the params:
454
455 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
456 raidPtr
457
458 for a total of 2d+1 arguments.
459 The result buffers results[0], results[1] are the buffers for the p and q,
460 respectively.
461
462 We compute Q first, then compute P. The P calculation may try to reuse
463 one of the input buffers for its output, so if we computed P first, we would
464 corrupt the input for the q calculation.
465 */
466
467 int
468 rf_RegularPQFunc(node)
469 RF_DagNode_t *node;
470 {
471 RegularQSubr(node, node->results[1]);
472 return (rf_RegularXorFunc(node)); /* does the wakeup */
473 }
474
475 int
476 rf_RegularQFunc(node)
477 RF_DagNode_t *node;
478 {
479 /* Almost ... adjust Qsubr args */
480 RegularQSubr(node, node->results[0]);
481 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
482 * I/O in this node */
483 return (0);
484 }
485 /*
486 Called by singly degraded write code to compute the new parity and the new q.
487
488 structure of the params:
489
490 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
491 failedPDA raidPtr
492
493 for a total of 2d+2 arguments.
494 The result buffers results[0], results[1] are the buffers for the parity and q,
495 respectively.
496
497 We compute Q first, then compute parity. The parity calculation may try to reuse
498 one of the input buffers for its output, so if we computed parity first, we would
499 corrupt the input for the q calculation.
500
501 We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
502 */
503
504 void
505 rf_Degraded_100_PQFunc(node)
506 RF_DagNode_t *node;
507 {
508 int np = node->numParams;
509
510 RF_ASSERT(np >= 2);
511 DegrQSubr(node);
512 rf_RecoveryXorFunc(node);
513 }
514
515
516 /*
517 The two below are used when reading a stripe with a single lost data unit.
518 The parameters are
519
520 pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
521
522 and results[0] contains the data buffer. Which is originally zero-filled.
523
524 */
525
526 /* this Q func is used by the degraded-mode dag functions to recover lost data.
527 * the second-to-last parameter is the PDA for the failed portion of the access.
528 * the code here looks at this PDA and assumes that the xor target buffer is
529 * equal in size to the number of sectors in the failed PDA. It then uses
530 * the other PDAs in the parameter list to determine where within the target
531 * buffer the corresponding data should be xored.
532 *
533 * Recall the basic equation is
534 *
535 * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256
536 *
537 * so to recover data_j we need
538 *
539 * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
540 *
541 * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
542 * copying Q into it. Then we need to do a table lookup to convert to solve
543 * data_j /= J
544 *
545 *
546 */
547 int
548 rf_RecoveryQFunc(node)
549 RF_DagNode_t *node;
550 {
551 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
552 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
553 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
554 int i;
555 RF_PhysDiskAddr_t *pda;
556 RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
557 char *srcbuf, *destbuf;
558 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
559 RF_Etimer_t timer;
560 unsigned long coeff;
561
562 RF_ETIMER_START(timer);
563 /* start by copying Q into the buffer */
564 bcopy(node->params[node->numParams - 3].p, node->results[0],
565 rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
566 for (i = 0; i < node->numParams - 4; i += 2) {
567 RF_ASSERT(node->params[i + 1].p != node->results[0]);
568 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
569 srcbuf = (char *) node->params[i + 1].p;
570 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
571 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
572 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress);
573 /* compute the data unit offset within the column */
574 coeff = (coeff % raidPtr->Layout.numDataCol);
575 rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
576 }
577 /* Do the nasty inversion now */
578 coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol);
579 rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
580 RF_ETIMER_STOP(timer);
581 RF_ETIMER_EVAL(timer);
582 tracerec->q_us += RF_ETIMER_VAL_US(timer);
583 rf_GenericWakeupFunc(node, 0);
584 return (0);
585 }
586
587 int
588 rf_RecoveryPQFunc(node)
589 RF_DagNode_t *node;
590 {
591 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
592 printf("raid%d: Recovery from PQ not implemented.\n",raidPtr->raidid);
593 return (1);
594 }
595 /*
596 Degraded write Q subroutine.
597 Used when P is dead.
598 Large-write style Q computation.
599 Parameters
600
601 (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
602
603 We ignore failedPDA.
604
605 This is a "simple style" recovery func.
606 */
607
608 void
609 rf_PQ_DegradedWriteQFunc(node)
610 RF_DagNode_t *node;
611 {
612 int np = node->numParams;
613 int d;
614 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
615 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
616 int i;
617 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
618 RF_Etimer_t timer;
619 char *qbuf = node->results[0];
620 char *obuf, *qpbuf;
621 RF_PhysDiskAddr_t *old;
622 unsigned long coeff;
623 int fail_start, j;
624
625 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
626 fail_start = old->startSector % secPerSU;
627
628 RF_ETIMER_START(timer);
629
630 d = (np - 2) / 2;
631 RF_ASSERT(2 * d + 2 == np);
632
633 for (i = 0; i < d; i++) {
634 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
635 obuf = (char *) node->params[2 * i + 1].p;
636 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
637 /* compute the data unit offset within the column, then add
638 * one */
639 coeff = (coeff % raidPtr->Layout.numDataCol);
640 j = old->startSector % secPerSU;
641 RF_ASSERT(j >= fail_start);
642 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
643 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
644 }
645
646 RF_ETIMER_STOP(timer);
647 RF_ETIMER_EVAL(timer);
648 tracerec->q_us += RF_ETIMER_VAL_US(timer);
649 rf_GenericWakeupFunc(node, 0);
650 }
651
652
653
654
655 /* Q computations */
656
657 /*
658 coeff - colummn;
659
660 compute dest ^= qfor[28-coeff][rn[coeff+1] a]
661
662 on 5-bit basis;
663 length in bytes;
664 */
665
666 void
667 rf_IncQ(dest, buf, length, coeff)
668 unsigned long *dest;
669 unsigned long *buf;
670 unsigned length;
671 unsigned coeff;
672 {
673 unsigned long a, d, new;
674 unsigned long a1, a2;
675 unsigned int *q = &(rf_qfor[28 - coeff][0]);
676 unsigned r = rf_rn[coeff + 1];
677
678 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
679 #define INSERT(a,i) (a << (5L*i))
680
681 length /= 8;
682 /* 13 5 bit quants in a 64 bit word */
683 while (length) {
684 a = *buf++;
685 d = *dest;
686 a1 = EXTRACT(a, 0) ^ r;
687 a2 = EXTRACT(a, 1) ^ r;
688 new = INSERT(a2, 1) | a1;
689 a1 = EXTRACT(a, 2) ^ r;
690 a2 = EXTRACT(a, 3) ^ r;
691 a1 = q[a1];
692 a2 = q[a2];
693 new = new | INSERT(a1, 2) | INSERT(a2, 3);
694 a1 = EXTRACT(a, 4) ^ r;
695 a2 = EXTRACT(a, 5) ^ r;
696 a1 = q[a1];
697 a2 = q[a2];
698 new = new | INSERT(a1, 4) | INSERT(a2, 5);
699 a1 = EXTRACT(a, 5) ^ r;
700 a2 = EXTRACT(a, 6) ^ r;
701 a1 = q[a1];
702 a2 = q[a2];
703 new = new | INSERT(a1, 5) | INSERT(a2, 6);
704 #if RF_LONGSHIFT > 2
705 a1 = EXTRACT(a, 7) ^ r;
706 a2 = EXTRACT(a, 8) ^ r;
707 a1 = q[a1];
708 a2 = q[a2];
709 new = new | INSERT(a1, 7) | INSERT(a2, 8);
710 a1 = EXTRACT(a, 9) ^ r;
711 a2 = EXTRACT(a, 10) ^ r;
712 a1 = q[a1];
713 a2 = q[a2];
714 new = new | INSERT(a1, 9) | INSERT(a2, 10);
715 a1 = EXTRACT(a, 11) ^ r;
716 a2 = EXTRACT(a, 12) ^ r;
717 a1 = q[a1];
718 a2 = q[a2];
719 new = new | INSERT(a1, 11) | INSERT(a2, 12);
720 #endif /* RF_LONGSHIFT > 2 */
721 d ^= new;
722 *dest++ = d;
723 length--;
724 }
725 }
726 /*
727 compute
728
729 dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
730
731 on a five bit basis.
732 optimization: compute old ^ new on 64 bit basis.
733
734 length in bytes.
735 */
736
737 static void
738 QDelta(
739 char *dest,
740 char *obuf,
741 char *nbuf,
742 unsigned length,
743 unsigned char coeff)
744 {
745 unsigned long a, d, new;
746 unsigned long a1, a2;
747 unsigned int *q = &(rf_qfor[28 - coeff][0]);
748 unsigned int r = rf_rn[coeff + 1];
749
750 r = a1 = a2 = new = d = a = 0; /* XXX for now... */
751 q = NULL; /* XXX for now */
752
753 #ifdef _KERNEL
754 /* PQ in kernel currently not supported because the encoding/decoding
755 * table is not present */
756 bzero(dest, length);
757 #else /* KERNEL */
758 /* this code probably doesn't work and should be rewritten -wvcii */
759 /* 13 5 bit quants in a 64 bit word */
760 length /= 8;
761 while (length) {
762 a = *obuf++; /* XXX need to reorg to avoid cache conflicts */
763 a ^= *nbuf++;
764 d = *dest;
765 a1 = EXTRACT(a, 0) ^ r;
766 a2 = EXTRACT(a, 1) ^ r;
767 a1 = q[a1];
768 a2 = q[a2];
769 new = INSERT(a2, 1) | a1;
770 a1 = EXTRACT(a, 2) ^ r;
771 a2 = EXTRACT(a, 3) ^ r;
772 a1 = q[a1];
773 a2 = q[a2];
774 new = new | INSERT(a1, 2) | INSERT(a2, 3);
775 a1 = EXTRACT(a, 4) ^ r;
776 a2 = EXTRACT(a, 5) ^ r;
777 a1 = q[a1];
778 a2 = q[a2];
779 new = new | INSERT(a1, 4) | INSERT(a2, 5);
780 a1 = EXTRACT(a, 5) ^ r;
781 a2 = EXTRACT(a, 6) ^ r;
782 a1 = q[a1];
783 a2 = q[a2];
784 new = new | INSERT(a1, 5) | INSERT(a2, 6);
785 #if RF_LONGSHIFT > 2
786 a1 = EXTRACT(a, 7) ^ r;
787 a2 = EXTRACT(a, 8) ^ r;
788 a1 = q[a1];
789 a2 = q[a2];
790 new = new | INSERT(a1, 7) | INSERT(a2, 8);
791 a1 = EXTRACT(a, 9) ^ r;
792 a2 = EXTRACT(a, 10) ^ r;
793 a1 = q[a1];
794 a2 = q[a2];
795 new = new | INSERT(a1, 9) | INSERT(a2, 10);
796 a1 = EXTRACT(a, 11) ^ r;
797 a2 = EXTRACT(a, 12) ^ r;
798 a1 = q[a1];
799 a2 = q[a2];
800 new = new | INSERT(a1, 11) | INSERT(a2, 12);
801 #endif /* RF_LONGSHIFT > 2 */
802 d ^= new;
803 *dest++ = d;
804 length--;
805 }
806 #endif /* _KERNEL */
807 }
808 /*
809 recover columns a and b from the given p and q into
810 bufs abuf and bbuf. All bufs are word aligned.
811 Length is in bytes.
812 */
813
814
815 /*
816 * XXX
817 *
818 * Everything about this seems wrong.
819 */
820 void
821 rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b)
822 unsigned long *pbuf;
823 unsigned long *qbuf;
824 unsigned long *abuf;
825 unsigned long *bbuf;
826 unsigned length;
827 unsigned coeff_a;
828 unsigned coeff_b;
829 {
830 unsigned long p, q, a, a0, a1;
831 int col = (29 * coeff_a) + coeff_b;
832 unsigned char *q0 = &(rf_qinv[col][0]);
833
834 length /= 8;
835 while (length) {
836 p = *pbuf++;
837 q = *qbuf++;
838 a0 = EXTRACT(p, 0);
839 a1 = EXTRACT(q, 0);
840 a = q0[a0 << 5 | a1];
841 #define MF(i) \
842 a0 = EXTRACT(p,i); \
843 a1 = EXTRACT(q,i); \
844 a = a | INSERT(q0[a0<<5 | a1],i)
845
846 MF(1);
847 MF(2);
848 MF(3);
849 MF(4);
850 MF(5);
851 MF(6);
852 #if 0
853 MF(7);
854 MF(8);
855 MF(9);
856 MF(10);
857 MF(11);
858 MF(12);
859 #endif /* 0 */
860 *abuf++ = a;
861 *bbuf++ = a ^ p;
862 length--;
863 }
864 }
865 /*
866 Lost parity and a data column. Recover that data column.
867 Assume col coeff is lost. Let q the contents of Q after
868 all surviving data columns have been q-xored out of it.
869 Then we have the equation
870
871 q[28-coeff][a_i ^ r_i+1] = q
872
873 but q is cyclic with period 31.
874 So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
875 q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
876
877 so a_i = r_{coeff+1} ^ q[3+coeff][q]
878
879 The routine is passed q buffer and the buffer
880 the data is to be recoverd into. They can be the same.
881 */
882
883
884
885 static void
886 rf_InvertQ(
887 unsigned long *qbuf,
888 unsigned long *abuf,
889 unsigned length,
890 unsigned coeff)
891 {
892 unsigned long a, new;
893 unsigned long a1, a2;
894 unsigned int *q = &(rf_qfor[3 + coeff][0]);
895 unsigned r = rf_rn[coeff + 1];
896
897 /* 13 5 bit quants in a 64 bit word */
898 length /= 8;
899 while (length) {
900 a = *qbuf++;
901 a1 = EXTRACT(a, 0);
902 a2 = EXTRACT(a, 1);
903 a1 = r ^ q[a1];
904 a2 = r ^ q[a2];
905 new = INSERT(a2, 1) | a1;
906 #define M(i,j) \
907 a1 = EXTRACT(a,i); \
908 a2 = EXTRACT(a,j); \
909 a1 = r ^ q[a1]; \
910 a2 = r ^ q[a2]; \
911 new = new | INSERT(a1,i) | INSERT(a2,j)
912
913 M(2, 3);
914 M(4, 5);
915 M(5, 6);
916 #if RF_LONGSHIFT > 2
917 M(7, 8);
918 M(9, 10);
919 M(11, 12);
920 #endif /* RF_LONGSHIFT > 2 */
921 *abuf++ = new;
922 length--;
923 }
924 }
925 #endif /* (RF_INCLUDE_DECL_PQ > 0) ||
926 * (RF_INCLUDE_RAID6 > 0) */
927