rf_pq.c revision 1.7.6.3 1 /* $NetBSD: rf_pq.c,v 1.7.6.3 2001/10/22 20:41:38 nathanw Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Daniel Stodolsky
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /*
30 * Code for RAID level 6 (P + Q) disk array architecture.
31 */
32
33 #include "rf_archs.h"
34
35 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0)
36
37 #include <dev/raidframe/raidframevar.h>
38
39 #include "rf_raid.h"
40 #include "rf_dag.h"
41 #include "rf_dagffrd.h"
42 #include "rf_dagffwr.h"
43 #include "rf_dagdegrd.h"
44 #include "rf_dagdegwr.h"
45 #include "rf_dagutils.h"
46 #include "rf_dagfuncs.h"
47 #include "rf_etimer.h"
48 #include "rf_pqdeg.h"
49 #include "rf_general.h"
50 #include "rf_map.h"
51 #include "rf_pq.h"
52
53 RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"};
54 RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"};
55
56 int
57 rf_RegularONPFunc(node)
58 RF_DagNode_t *node;
59 {
60 return (rf_RegularXorFunc(node));
61 }
62 /*
63 same as simpleONQ func, but the coefficient is always 1
64 */
65
66 int
67 rf_SimpleONPFunc(node)
68 RF_DagNode_t *node;
69 {
70 return (rf_SimpleXorFunc(node));
71 }
72
73 int
74 rf_RecoveryPFunc(node)
75 RF_DagNode_t *node;
76 {
77 return (rf_RecoveryXorFunc(node));
78 }
79
80 int
81 rf_RegularPFunc(node)
82 RF_DagNode_t *node;
83 {
84 return (rf_RegularXorFunc(node));
85 }
86 #endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */
87 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
88
89 static void
90 QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
91 unsigned char coeff);
92 static void
93 rf_InvertQ(unsigned long *qbuf, unsigned long *abuf,
94 unsigned length, unsigned coeff);
95
96 RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"};
97 RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"};
98 RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"};
99
100 void
101 rf_PQDagSelect(
102 RF_Raid_t * raidPtr,
103 RF_IoType_t type,
104 RF_AccessStripeMap_t * asmap,
105 RF_VoidFuncPtr * createFunc)
106 {
107 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
108 unsigned ndfail = asmap->numDataFailed;
109 unsigned npfail = asmap->numParityFailed;
110 unsigned ntfail = npfail + ndfail;
111
112 RF_ASSERT(RF_IO_IS_R_OR_W(type));
113 if (ntfail > 2) {
114 RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n");
115 /* *infoFunc = */ *createFunc = NULL;
116 return;
117 }
118 /* ok, we can do this I/O */
119 if (type == RF_IO_TYPE_READ) {
120 switch (ndfail) {
121 case 0:
122 /* fault free read */
123 *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */
124 break;
125 case 1:
126 /* lost a single data unit */
127 /* two cases: (1) parity is not lost. do a normal raid
128 * 5 reconstruct read. (2) parity is lost. do a
129 * reconstruct read using "q". */
130 if (ntfail == 2) { /* also lost redundancy */
131 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
132 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG;
133 else
134 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG;
135 } else {
136 /* P and Q are ok. But is there a failure in
137 * some unaccessed data unit? */
138 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
139 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
140 else
141 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG;
142 }
143 break;
144 case 2:
145 /* lost two data units */
146 /* *infoFunc = PQOneTwo; */
147 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
148 break;
149 }
150 return;
151 }
152 /* a write */
153 switch (ntfail) {
154 case 0: /* fault free */
155 if (rf_suppressLocksAndLargeWrites ||
156 (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
157 (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
158
159 *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG;
160 } else {
161 *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG;
162 }
163 break;
164
165 case 1: /* single disk fault */
166 if (npfail == 1) {
167 RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
168 if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like
169 * normal mode raid5
170 * write. */
171 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
172 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
173 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG;
174 else
175 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG;
176 } else {/* parity died, small write only updating Q */
177 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
178 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
179 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG;
180 else
181 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG;
182 }
183 } else { /* data missing. Do a P reconstruct write if
184 * only a single data unit is lost in the
185 * stripe, otherwise a PQ reconstruct write. */
186 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
187 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
188 else
189 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG;
190 }
191 break;
192
193 case 2: /* two disk faults */
194 switch (npfail) {
195 case 2: /* both p and q dead */
196 *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG;
197 break;
198 case 1: /* either p or q and dead data */
199 RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
200 RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
201 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
202 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG;
203 else
204 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG;
205 break;
206 case 0: /* double data loss */
207 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
208 break;
209 }
210 break;
211
212 default: /* more than 2 disk faults */
213 *createFunc = NULL;
214 RF_PANIC();
215 }
216 return;
217 }
218 /*
219 Used as a stop gap info function
220 */
221 #if 0
222 static void
223 PQOne(raidPtr, nSucc, nAnte, asmap)
224 RF_Raid_t *raidPtr;
225 int *nSucc;
226 int *nAnte;
227 RF_AccessStripeMap_t *asmap;
228 {
229 *nSucc = *nAnte = 1;
230 }
231
232 static void
233 PQOneTwo(raidPtr, nSucc, nAnte, asmap)
234 RF_Raid_t *raidPtr;
235 int *nSucc;
236 int *nAnte;
237 RF_AccessStripeMap_t *asmap;
238 {
239 *nSucc = 1;
240 *nAnte = 2;
241 }
242 #endif
243
244 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
245 {
246 rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2,
247 rf_RegularPQFunc, RF_FALSE);
248 }
249
250 int
251 rf_RegularONQFunc(node)
252 RF_DagNode_t *node;
253 {
254 int np = node->numParams;
255 int d;
256 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
257 int i;
258 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
259 RF_Etimer_t timer;
260 char *qbuf, *qpbuf;
261 char *obuf, *nbuf;
262 RF_PhysDiskAddr_t *old, *new;
263 unsigned long coeff;
264 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
265
266 RF_ETIMER_START(timer);
267
268 d = (np - 3) / 4;
269 RF_ASSERT(4 * d + 3 == np);
270 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
271 for (i = 0; i < d; i++) {
272 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
273 obuf = (char *) node->params[2 * i + 1].p;
274 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
275 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
276 RF_ASSERT(new->numSector == old->numSector);
277 RF_ASSERT(new->raidAddress == old->raidAddress);
278 /* the stripe unit within the stripe tells us the coefficient
279 * to use for the multiply. */
280 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
281 /* compute the data unit offset within the column, then add
282 * one */
283 coeff = (coeff % raidPtr->Layout.numDataCol);
284 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
285 QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
286 }
287
288 RF_ETIMER_STOP(timer);
289 RF_ETIMER_EVAL(timer);
290 tracerec->q_us += RF_ETIMER_VAL_US(timer);
291 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
292 * I/O in this node */
293 return (0);
294 }
295 /*
296 See the SimpleXORFunc for the difference between a simple and regular func.
297 These Q functions should be used for
298
299 new q = Q(data,old data,old q)
300
301 style updates and not for
302
303 q = ( new data, new data, .... )
304
305 computations.
306
307 The simple q takes 2(2d+1)+1 params, where d is the number
308 of stripes written. The order of params is
309 old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
310 [2d] old q pda_0, old q buffer
311 [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d
312 raidPtr
313 */
314
315 int
316 rf_SimpleONQFunc(node)
317 RF_DagNode_t *node;
318 {
319 int np = node->numParams;
320 int d;
321 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
322 int i;
323 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
324 RF_Etimer_t timer;
325 char *qbuf;
326 char *obuf, *nbuf;
327 RF_PhysDiskAddr_t *old, *new;
328 unsigned long coeff;
329
330 RF_ETIMER_START(timer);
331
332 d = (np - 3) / 4;
333 RF_ASSERT(4 * d + 3 == np);
334 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
335 for (i = 0; i < d; i++) {
336 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
337 obuf = (char *) node->params[2 * i + 1].p;
338 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
339 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
340 RF_ASSERT(new->numSector == old->numSector);
341 RF_ASSERT(new->raidAddress == old->raidAddress);
342 /* the stripe unit within the stripe tells us the coefficient
343 * to use for the multiply. */
344 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
345 /* compute the data unit offset within the column, then add
346 * one */
347 coeff = (coeff % raidPtr->Layout.numDataCol);
348 QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
349 }
350
351 RF_ETIMER_STOP(timer);
352 RF_ETIMER_EVAL(timer);
353 tracerec->q_us += RF_ETIMER_VAL_US(timer);
354 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
355 * I/O in this node */
356 return (0);
357 }
358 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
359 {
360 rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);
361 }
362
363 static void RegularQSubr(RF_DagNode_t *node, char *qbuf);
364
365 static void
366 RegularQSubr(node, qbuf)
367 RF_DagNode_t *node;
368 char *qbuf;
369 {
370 int np = node->numParams;
371 int d;
372 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
373 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
374 int i;
375 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
376 RF_Etimer_t timer;
377 char *obuf, *qpbuf;
378 RF_PhysDiskAddr_t *old;
379 unsigned long coeff;
380
381 RF_ETIMER_START(timer);
382
383 d = (np - 1) / 2;
384 RF_ASSERT(2 * d + 1 == np);
385 for (i = 0; i < d; i++) {
386 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
387 obuf = (char *) node->params[2 * i + 1].p;
388 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
389 /* compute the data unit offset within the column, then add
390 * one */
391 coeff = (coeff % raidPtr->Layout.numDataCol);
392 /* the input buffers may not all be aligned with the start of
393 * the stripe. so shift by their sector offset within the
394 * stripe unit */
395 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
396 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
397 }
398
399 RF_ETIMER_STOP(timer);
400 RF_ETIMER_EVAL(timer);
401 tracerec->q_us += RF_ETIMER_VAL_US(timer);
402 }
403 /*
404 used in degraded writes.
405 */
406
407 static void DegrQSubr(RF_DagNode_t *node);
408
409 static void
410 DegrQSubr(node)
411 RF_DagNode_t *node;
412 {
413 int np = node->numParams;
414 int d;
415 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
416 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
417 int i;
418 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
419 RF_Etimer_t timer;
420 char *qbuf = node->results[1];
421 char *obuf, *qpbuf;
422 RF_PhysDiskAddr_t *old;
423 unsigned long coeff;
424 unsigned fail_start;
425 int j;
426
427 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
428 fail_start = old->startSector % secPerSU;
429
430 RF_ETIMER_START(timer);
431
432 d = (np - 2) / 2;
433 RF_ASSERT(2 * d + 2 == np);
434 for (i = 0; i < d; i++) {
435 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
436 obuf = (char *) node->params[2 * i + 1].p;
437 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
438 /* compute the data unit offset within the column, then add
439 * one */
440 coeff = (coeff % raidPtr->Layout.numDataCol);
441 /* the input buffers may not all be aligned with the start of
442 * the stripe. so shift by their sector offset within the
443 * stripe unit */
444 j = old->startSector % secPerSU;
445 RF_ASSERT(j >= fail_start);
446 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
447 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
448 }
449
450 RF_ETIMER_STOP(timer);
451 RF_ETIMER_EVAL(timer);
452 tracerec->q_us += RF_ETIMER_VAL_US(timer);
453 }
454 /*
455 Called by large write code to compute the new parity and the new q.
456
457 structure of the params:
458
459 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
460 raidPtr
461
462 for a total of 2d+1 arguments.
463 The result buffers results[0], results[1] are the buffers for the p and q,
464 respectively.
465
466 We compute Q first, then compute P. The P calculation may try to reuse
467 one of the input buffers for its output, so if we computed P first, we would
468 corrupt the input for the q calculation.
469 */
470
471 int
472 rf_RegularPQFunc(node)
473 RF_DagNode_t *node;
474 {
475 RegularQSubr(node, node->results[1]);
476 return (rf_RegularXorFunc(node)); /* does the wakeup */
477 }
478
479 int
480 rf_RegularQFunc(node)
481 RF_DagNode_t *node;
482 {
483 /* Almost ... adjust Qsubr args */
484 RegularQSubr(node, node->results[0]);
485 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
486 * I/O in this node */
487 return (0);
488 }
489 /*
490 Called by singly degraded write code to compute the new parity and the new q.
491
492 structure of the params:
493
494 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
495 failedPDA raidPtr
496
497 for a total of 2d+2 arguments.
498 The result buffers results[0], results[1] are the buffers for the parity and q,
499 respectively.
500
501 We compute Q first, then compute parity. The parity calculation may try to reuse
502 one of the input buffers for its output, so if we computed parity first, we would
503 corrupt the input for the q calculation.
504
505 We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
506 */
507
508 void
509 rf_Degraded_100_PQFunc(node)
510 RF_DagNode_t *node;
511 {
512 int np = node->numParams;
513
514 RF_ASSERT(np >= 2);
515 DegrQSubr(node);
516 rf_RecoveryXorFunc(node);
517 }
518
519
520 /*
521 The two below are used when reading a stripe with a single lost data unit.
522 The parameters are
523
524 pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
525
526 and results[0] contains the data buffer. Which is originally zero-filled.
527
528 */
529
530 /* this Q func is used by the degraded-mode dag functions to recover lost data.
531 * the second-to-last parameter is the PDA for the failed portion of the access.
532 * the code here looks at this PDA and assumes that the xor target buffer is
533 * equal in size to the number of sectors in the failed PDA. It then uses
534 * the other PDAs in the parameter list to determine where within the target
535 * buffer the corresponding data should be xored.
536 *
537 * Recall the basic equation is
538 *
539 * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256
540 *
541 * so to recover data_j we need
542 *
543 * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
544 *
545 * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
546 * copying Q into it. Then we need to do a table lookup to convert to solve
547 * data_j /= J
548 *
549 *
550 */
551 int
552 rf_RecoveryQFunc(node)
553 RF_DagNode_t *node;
554 {
555 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
556 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
557 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
558 int i;
559 RF_PhysDiskAddr_t *pda;
560 RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
561 char *srcbuf, *destbuf;
562 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
563 RF_Etimer_t timer;
564 unsigned long coeff;
565
566 RF_ETIMER_START(timer);
567 /* start by copying Q into the buffer */
568 bcopy(node->params[node->numParams - 3].p, node->results[0],
569 rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
570 for (i = 0; i < node->numParams - 4; i += 2) {
571 RF_ASSERT(node->params[i + 1].p != node->results[0]);
572 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
573 srcbuf = (char *) node->params[i + 1].p;
574 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
575 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
576 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress);
577 /* compute the data unit offset within the column */
578 coeff = (coeff % raidPtr->Layout.numDataCol);
579 rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
580 }
581 /* Do the nasty inversion now */
582 coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol);
583 rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
584 RF_ETIMER_STOP(timer);
585 RF_ETIMER_EVAL(timer);
586 tracerec->q_us += RF_ETIMER_VAL_US(timer);
587 rf_GenericWakeupFunc(node, 0);
588 return (0);
589 }
590
591 int
592 rf_RecoveryPQFunc(node)
593 RF_DagNode_t *node;
594 {
595 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
596 printf("raid%d: Recovery from PQ not implemented.\n",raidPtr->raidid);
597 return (1);
598 }
599 /*
600 Degraded write Q subroutine.
601 Used when P is dead.
602 Large-write style Q computation.
603 Parameters
604
605 (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
606
607 We ignore failedPDA.
608
609 This is a "simple style" recovery func.
610 */
611
612 void
613 rf_PQ_DegradedWriteQFunc(node)
614 RF_DagNode_t *node;
615 {
616 int np = node->numParams;
617 int d;
618 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
619 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
620 int i;
621 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
622 RF_Etimer_t timer;
623 char *qbuf = node->results[0];
624 char *obuf, *qpbuf;
625 RF_PhysDiskAddr_t *old;
626 unsigned long coeff;
627 int fail_start, j;
628
629 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
630 fail_start = old->startSector % secPerSU;
631
632 RF_ETIMER_START(timer);
633
634 d = (np - 2) / 2;
635 RF_ASSERT(2 * d + 2 == np);
636
637 for (i = 0; i < d; i++) {
638 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
639 obuf = (char *) node->params[2 * i + 1].p;
640 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
641 /* compute the data unit offset within the column, then add
642 * one */
643 coeff = (coeff % raidPtr->Layout.numDataCol);
644 j = old->startSector % secPerSU;
645 RF_ASSERT(j >= fail_start);
646 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
647 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
648 }
649
650 RF_ETIMER_STOP(timer);
651 RF_ETIMER_EVAL(timer);
652 tracerec->q_us += RF_ETIMER_VAL_US(timer);
653 rf_GenericWakeupFunc(node, 0);
654 }
655
656
657
658
659 /* Q computations */
660
661 /*
662 coeff - colummn;
663
664 compute dest ^= qfor[28-coeff][rn[coeff+1] a]
665
666 on 5-bit basis;
667 length in bytes;
668 */
669
670 void
671 rf_IncQ(dest, buf, length, coeff)
672 unsigned long *dest;
673 unsigned long *buf;
674 unsigned length;
675 unsigned coeff;
676 {
677 unsigned long a, d, new;
678 unsigned long a1, a2;
679 unsigned int *q = &(rf_qfor[28 - coeff][0]);
680 unsigned r = rf_rn[coeff + 1];
681
682 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
683 #define INSERT(a,i) (a << (5L*i))
684
685 length /= 8;
686 /* 13 5 bit quants in a 64 bit word */
687 while (length) {
688 a = *buf++;
689 d = *dest;
690 a1 = EXTRACT(a, 0) ^ r;
691 a2 = EXTRACT(a, 1) ^ r;
692 new = INSERT(a2, 1) | a1;
693 a1 = EXTRACT(a, 2) ^ r;
694 a2 = EXTRACT(a, 3) ^ r;
695 a1 = q[a1];
696 a2 = q[a2];
697 new = new | INSERT(a1, 2) | INSERT(a2, 3);
698 a1 = EXTRACT(a, 4) ^ r;
699 a2 = EXTRACT(a, 5) ^ r;
700 a1 = q[a1];
701 a2 = q[a2];
702 new = new | INSERT(a1, 4) | INSERT(a2, 5);
703 a1 = EXTRACT(a, 5) ^ r;
704 a2 = EXTRACT(a, 6) ^ r;
705 a1 = q[a1];
706 a2 = q[a2];
707 new = new | INSERT(a1, 5) | INSERT(a2, 6);
708 #if RF_LONGSHIFT > 2
709 a1 = EXTRACT(a, 7) ^ r;
710 a2 = EXTRACT(a, 8) ^ r;
711 a1 = q[a1];
712 a2 = q[a2];
713 new = new | INSERT(a1, 7) | INSERT(a2, 8);
714 a1 = EXTRACT(a, 9) ^ r;
715 a2 = EXTRACT(a, 10) ^ r;
716 a1 = q[a1];
717 a2 = q[a2];
718 new = new | INSERT(a1, 9) | INSERT(a2, 10);
719 a1 = EXTRACT(a, 11) ^ r;
720 a2 = EXTRACT(a, 12) ^ r;
721 a1 = q[a1];
722 a2 = q[a2];
723 new = new | INSERT(a1, 11) | INSERT(a2, 12);
724 #endif /* RF_LONGSHIFT > 2 */
725 d ^= new;
726 *dest++ = d;
727 length--;
728 }
729 }
730 /*
731 compute
732
733 dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
734
735 on a five bit basis.
736 optimization: compute old ^ new on 64 bit basis.
737
738 length in bytes.
739 */
740
741 static void
742 QDelta(
743 char *dest,
744 char *obuf,
745 char *nbuf,
746 unsigned length,
747 unsigned char coeff)
748 {
749 unsigned long a, d, new;
750 unsigned long a1, a2;
751 unsigned int *q = &(rf_qfor[28 - coeff][0]);
752 unsigned int r = rf_rn[coeff + 1];
753
754 r = a1 = a2 = new = d = a = 0; /* XXX for now... */
755 q = NULL; /* XXX for now */
756
757 #ifdef _KERNEL
758 /* PQ in kernel currently not supported because the encoding/decoding
759 * table is not present */
760 memset(dest, 0, length);
761 #else /* KERNEL */
762 /* this code probably doesn't work and should be rewritten -wvcii */
763 /* 13 5 bit quants in a 64 bit word */
764 length /= 8;
765 while (length) {
766 a = *obuf++; /* XXX need to reorg to avoid cache conflicts */
767 a ^= *nbuf++;
768 d = *dest;
769 a1 = EXTRACT(a, 0) ^ r;
770 a2 = EXTRACT(a, 1) ^ r;
771 a1 = q[a1];
772 a2 = q[a2];
773 new = INSERT(a2, 1) | a1;
774 a1 = EXTRACT(a, 2) ^ r;
775 a2 = EXTRACT(a, 3) ^ r;
776 a1 = q[a1];
777 a2 = q[a2];
778 new = new | INSERT(a1, 2) | INSERT(a2, 3);
779 a1 = EXTRACT(a, 4) ^ r;
780 a2 = EXTRACT(a, 5) ^ r;
781 a1 = q[a1];
782 a2 = q[a2];
783 new = new | INSERT(a1, 4) | INSERT(a2, 5);
784 a1 = EXTRACT(a, 5) ^ r;
785 a2 = EXTRACT(a, 6) ^ r;
786 a1 = q[a1];
787 a2 = q[a2];
788 new = new | INSERT(a1, 5) | INSERT(a2, 6);
789 #if RF_LONGSHIFT > 2
790 a1 = EXTRACT(a, 7) ^ r;
791 a2 = EXTRACT(a, 8) ^ r;
792 a1 = q[a1];
793 a2 = q[a2];
794 new = new | INSERT(a1, 7) | INSERT(a2, 8);
795 a1 = EXTRACT(a, 9) ^ r;
796 a2 = EXTRACT(a, 10) ^ r;
797 a1 = q[a1];
798 a2 = q[a2];
799 new = new | INSERT(a1, 9) | INSERT(a2, 10);
800 a1 = EXTRACT(a, 11) ^ r;
801 a2 = EXTRACT(a, 12) ^ r;
802 a1 = q[a1];
803 a2 = q[a2];
804 new = new | INSERT(a1, 11) | INSERT(a2, 12);
805 #endif /* RF_LONGSHIFT > 2 */
806 d ^= new;
807 *dest++ = d;
808 length--;
809 }
810 #endif /* _KERNEL */
811 }
812 /*
813 recover columns a and b from the given p and q into
814 bufs abuf and bbuf. All bufs are word aligned.
815 Length is in bytes.
816 */
817
818
819 /*
820 * XXX
821 *
822 * Everything about this seems wrong.
823 */
824 void
825 rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b)
826 unsigned long *pbuf;
827 unsigned long *qbuf;
828 unsigned long *abuf;
829 unsigned long *bbuf;
830 unsigned length;
831 unsigned coeff_a;
832 unsigned coeff_b;
833 {
834 unsigned long p, q, a, a0, a1;
835 int col = (29 * coeff_a) + coeff_b;
836 unsigned char *q0 = &(rf_qinv[col][0]);
837
838 length /= 8;
839 while (length) {
840 p = *pbuf++;
841 q = *qbuf++;
842 a0 = EXTRACT(p, 0);
843 a1 = EXTRACT(q, 0);
844 a = q0[a0 << 5 | a1];
845 #define MF(i) \
846 a0 = EXTRACT(p,i); \
847 a1 = EXTRACT(q,i); \
848 a = a | INSERT(q0[a0<<5 | a1],i)
849
850 MF(1);
851 MF(2);
852 MF(3);
853 MF(4);
854 MF(5);
855 MF(6);
856 #if 0
857 MF(7);
858 MF(8);
859 MF(9);
860 MF(10);
861 MF(11);
862 MF(12);
863 #endif /* 0 */
864 *abuf++ = a;
865 *bbuf++ = a ^ p;
866 length--;
867 }
868 }
869 /*
870 Lost parity and a data column. Recover that data column.
871 Assume col coeff is lost. Let q the contents of Q after
872 all surviving data columns have been q-xored out of it.
873 Then we have the equation
874
875 q[28-coeff][a_i ^ r_i+1] = q
876
877 but q is cyclic with period 31.
878 So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
879 q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
880
881 so a_i = r_{coeff+1} ^ q[3+coeff][q]
882
883 The routine is passed q buffer and the buffer
884 the data is to be recoverd into. They can be the same.
885 */
886
887
888
889 static void
890 rf_InvertQ(
891 unsigned long *qbuf,
892 unsigned long *abuf,
893 unsigned length,
894 unsigned coeff)
895 {
896 unsigned long a, new;
897 unsigned long a1, a2;
898 unsigned int *q = &(rf_qfor[3 + coeff][0]);
899 unsigned r = rf_rn[coeff + 1];
900
901 /* 13 5 bit quants in a 64 bit word */
902 length /= 8;
903 while (length) {
904 a = *qbuf++;
905 a1 = EXTRACT(a, 0);
906 a2 = EXTRACT(a, 1);
907 a1 = r ^ q[a1];
908 a2 = r ^ q[a2];
909 new = INSERT(a2, 1) | a1;
910 #define M(i,j) \
911 a1 = EXTRACT(a,i); \
912 a2 = EXTRACT(a,j); \
913 a1 = r ^ q[a1]; \
914 a2 = r ^ q[a2]; \
915 new = new | INSERT(a1,i) | INSERT(a2,j)
916
917 M(2, 3);
918 M(4, 5);
919 M(5, 6);
920 #if RF_LONGSHIFT > 2
921 M(7, 8);
922 M(9, 10);
923 M(11, 12);
924 #endif /* RF_LONGSHIFT > 2 */
925 *abuf++ = new;
926 length--;
927 }
928 }
929 #endif /* (RF_INCLUDE_DECL_PQ > 0) ||
930 * (RF_INCLUDE_RAID6 > 0) */
931