rf_pq.c revision 1.5 1 /* $NetBSD: rf_pq.c,v 1.5 1999/08/15 02:36:40 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Daniel Stodolsky
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /*
30 * Code for RAID level 6 (P + Q) disk array architecture.
31 */
32
33 #include "rf_archs.h"
34 #include "rf_types.h"
35 #include "rf_raid.h"
36 #include "rf_dag.h"
37 #include "rf_dagffrd.h"
38 #include "rf_dagffwr.h"
39 #include "rf_dagdegrd.h"
40 #include "rf_dagdegwr.h"
41 #include "rf_dagutils.h"
42 #include "rf_dagfuncs.h"
43 #include "rf_threadid.h"
44 #include "rf_etimer.h"
45 #include "rf_pqdeg.h"
46 #include "rf_general.h"
47 #include "rf_map.h"
48 #include "rf_pq.h"
49
50 RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"};
51 RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"};
52
53 int
54 rf_RegularONPFunc(node)
55 RF_DagNode_t *node;
56 {
57 return (rf_RegularXorFunc(node));
58 }
59 /*
60 same as simpleONQ func, but the coefficient is always 1
61 */
62
63 int
64 rf_SimpleONPFunc(node)
65 RF_DagNode_t *node;
66 {
67 return (rf_SimpleXorFunc(node));
68 }
69
70 int
71 rf_RecoveryPFunc(node)
72 RF_DagNode_t *node;
73 {
74 return (rf_RecoveryXorFunc(node));
75 }
76
77 int
78 rf_RegularPFunc(node)
79 RF_DagNode_t *node;
80 {
81 return (rf_RegularXorFunc(node));
82 }
83 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
84
85 static void
86 QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
87 unsigned char coeff);
88 static void
89 rf_InvertQ(unsigned long *qbuf, unsigned long *abuf,
90 unsigned length, unsigned coeff);
91
92 RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"};
93 RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"};
94 RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"};
95
96 void
97 rf_PQDagSelect(
98 RF_Raid_t * raidPtr,
99 RF_IoType_t type,
100 RF_AccessStripeMap_t * asmap,
101 RF_VoidFuncPtr * createFunc)
102 {
103 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
104 unsigned ndfail = asmap->numDataFailed;
105 unsigned npfail = asmap->numParityFailed;
106 unsigned ntfail = npfail + ndfail;
107
108 RF_ASSERT(RF_IO_IS_R_OR_W(type));
109 if (ntfail > 2) {
110 RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n");
111 /* *infoFunc = */ *createFunc = NULL;
112 return;
113 }
114 /* ok, we can do this I/O */
115 if (type == RF_IO_TYPE_READ) {
116 switch (ndfail) {
117 case 0:
118 /* fault free read */
119 *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */
120 break;
121 case 1:
122 /* lost a single data unit */
123 /* two cases: (1) parity is not lost. do a normal raid
124 * 5 reconstruct read. (2) parity is lost. do a
125 * reconstruct read using "q". */
126 if (ntfail == 2) { /* also lost redundancy */
127 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
128 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG;
129 else
130 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG;
131 } else {
132 /* P and Q are ok. But is there a failure in
133 * some unaccessed data unit? */
134 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
135 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
136 else
137 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG;
138 }
139 break;
140 case 2:
141 /* lost two data units */
142 /* *infoFunc = PQOneTwo; */
143 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
144 break;
145 }
146 return;
147 }
148 /* a write */
149 switch (ntfail) {
150 case 0: /* fault free */
151 if (rf_suppressLocksAndLargeWrites ||
152 (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
153 (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
154
155 *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG;
156 } else {
157 *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG;
158 }
159 break;
160
161 case 1: /* single disk fault */
162 if (npfail == 1) {
163 RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
164 if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like
165 * normal mode raid5
166 * write. */
167 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
168 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
169 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG;
170 else
171 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG;
172 } else {/* parity died, small write only updating Q */
173 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
174 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
175 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG;
176 else
177 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG;
178 }
179 } else { /* data missing. Do a P reconstruct write if
180 * only a single data unit is lost in the
181 * stripe, otherwise a PQ reconstruct write. */
182 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
183 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
184 else
185 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG;
186 }
187 break;
188
189 case 2: /* two disk faults */
190 switch (npfail) {
191 case 2: /* both p and q dead */
192 *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG;
193 break;
194 case 1: /* either p or q and dead data */
195 RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
196 RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
197 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
198 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG;
199 else
200 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG;
201 break;
202 case 0: /* double data loss */
203 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
204 break;
205 }
206 break;
207
208 default: /* more than 2 disk faults */
209 *createFunc = NULL;
210 RF_PANIC();
211 }
212 return;
213 }
214 /*
215 Used as a stop gap info function
216 */
217 #if 0
218 static void
219 PQOne(raidPtr, nSucc, nAnte, asmap)
220 RF_Raid_t *raidPtr;
221 int *nSucc;
222 int *nAnte;
223 RF_AccessStripeMap_t *asmap;
224 {
225 *nSucc = *nAnte = 1;
226 }
227
228 static void
229 PQOneTwo(raidPtr, nSucc, nAnte, asmap)
230 RF_Raid_t *raidPtr;
231 int *nSucc;
232 int *nAnte;
233 RF_AccessStripeMap_t *asmap;
234 {
235 *nSucc = 1;
236 *nAnte = 2;
237 }
238 #endif
239
240 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
241 {
242 rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2,
243 rf_RegularPQFunc, RF_FALSE);
244 }
245
246 int
247 rf_RegularONQFunc(node)
248 RF_DagNode_t *node;
249 {
250 int np = node->numParams;
251 int d;
252 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
253 int i;
254 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
255 RF_Etimer_t timer;
256 char *qbuf, *qpbuf;
257 char *obuf, *nbuf;
258 RF_PhysDiskAddr_t *old, *new;
259 unsigned long coeff;
260 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
261
262 RF_ETIMER_START(timer);
263
264 d = (np - 3) / 4;
265 RF_ASSERT(4 * d + 3 == np);
266 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
267 for (i = 0; i < d; i++) {
268 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
269 obuf = (char *) node->params[2 * i + 1].p;
270 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
271 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
272 RF_ASSERT(new->numSector == old->numSector);
273 RF_ASSERT(new->raidAddress == old->raidAddress);
274 /* the stripe unit within the stripe tells us the coefficient
275 * to use for the multiply. */
276 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
277 /* compute the data unit offset within the column, then add
278 * one */
279 coeff = (coeff % raidPtr->Layout.numDataCol);
280 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
281 QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
282 }
283
284 RF_ETIMER_STOP(timer);
285 RF_ETIMER_EVAL(timer);
286 tracerec->q_us += RF_ETIMER_VAL_US(timer);
287 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
288 * I/O in this node */
289 return (0);
290 }
291 /*
292 See the SimpleXORFunc for the difference between a simple and regular func.
293 These Q functions should be used for
294
295 new q = Q(data,old data,old q)
296
297 style updates and not for
298
299 q = ( new data, new data, .... )
300
301 computations.
302
303 The simple q takes 2(2d+1)+1 params, where d is the number
304 of stripes written. The order of params is
305 old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
306 [2d] old q pda_0, old q buffer
307 [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d
308 raidPtr
309 */
310
311 int
312 rf_SimpleONQFunc(node)
313 RF_DagNode_t *node;
314 {
315 int np = node->numParams;
316 int d;
317 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
318 int i;
319 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
320 RF_Etimer_t timer;
321 char *qbuf;
322 char *obuf, *nbuf;
323 RF_PhysDiskAddr_t *old, *new;
324 unsigned long coeff;
325
326 RF_ETIMER_START(timer);
327
328 d = (np - 3) / 4;
329 RF_ASSERT(4 * d + 3 == np);
330 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
331 for (i = 0; i < d; i++) {
332 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
333 obuf = (char *) node->params[2 * i + 1].p;
334 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
335 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
336 RF_ASSERT(new->numSector == old->numSector);
337 RF_ASSERT(new->raidAddress == old->raidAddress);
338 /* the stripe unit within the stripe tells us the coefficient
339 * to use for the multiply. */
340 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
341 /* compute the data unit offset within the column, then add
342 * one */
343 coeff = (coeff % raidPtr->Layout.numDataCol);
344 QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
345 }
346
347 RF_ETIMER_STOP(timer);
348 RF_ETIMER_EVAL(timer);
349 tracerec->q_us += RF_ETIMER_VAL_US(timer);
350 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
351 * I/O in this node */
352 return (0);
353 }
354 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
355 {
356 rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);
357 }
358
359 static void RegularQSubr(RF_DagNode_t *node, char *qbuf);
360
361 static void
362 RegularQSubr(node, qbuf)
363 RF_DagNode_t *node;
364 char *qbuf;
365 {
366 int np = node->numParams;
367 int d;
368 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
369 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
370 int i;
371 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
372 RF_Etimer_t timer;
373 char *obuf, *qpbuf;
374 RF_PhysDiskAddr_t *old;
375 unsigned long coeff;
376
377 RF_ETIMER_START(timer);
378
379 d = (np - 1) / 2;
380 RF_ASSERT(2 * d + 1 == np);
381 for (i = 0; i < d; i++) {
382 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
383 obuf = (char *) node->params[2 * i + 1].p;
384 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
385 /* compute the data unit offset within the column, then add
386 * one */
387 coeff = (coeff % raidPtr->Layout.numDataCol);
388 /* the input buffers may not all be aligned with the start of
389 * the stripe. so shift by their sector offset within the
390 * stripe unit */
391 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
392 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
393 }
394
395 RF_ETIMER_STOP(timer);
396 RF_ETIMER_EVAL(timer);
397 tracerec->q_us += RF_ETIMER_VAL_US(timer);
398 }
399 /*
400 used in degraded writes.
401 */
402
403 static void DegrQSubr(RF_DagNode_t *node);
404
405 static void
406 DegrQSubr(node)
407 RF_DagNode_t *node;
408 {
409 int np = node->numParams;
410 int d;
411 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
412 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
413 int i;
414 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
415 RF_Etimer_t timer;
416 char *qbuf = node->results[1];
417 char *obuf, *qpbuf;
418 RF_PhysDiskAddr_t *old;
419 unsigned long coeff;
420 unsigned fail_start;
421 int j;
422
423 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
424 fail_start = old->startSector % secPerSU;
425
426 RF_ETIMER_START(timer);
427
428 d = (np - 2) / 2;
429 RF_ASSERT(2 * d + 2 == np);
430 for (i = 0; i < d; i++) {
431 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
432 obuf = (char *) node->params[2 * i + 1].p;
433 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
434 /* compute the data unit offset within the column, then add
435 * one */
436 coeff = (coeff % raidPtr->Layout.numDataCol);
437 /* the input buffers may not all be aligned with the start of
438 * the stripe. so shift by their sector offset within the
439 * stripe unit */
440 j = old->startSector % secPerSU;
441 RF_ASSERT(j >= fail_start);
442 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
443 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
444 }
445
446 RF_ETIMER_STOP(timer);
447 RF_ETIMER_EVAL(timer);
448 tracerec->q_us += RF_ETIMER_VAL_US(timer);
449 }
450 /*
451 Called by large write code to compute the new parity and the new q.
452
453 structure of the params:
454
455 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
456 raidPtr
457
458 for a total of 2d+1 arguments.
459 The result buffers results[0], results[1] are the buffers for the p and q,
460 respectively.
461
462 We compute Q first, then compute P. The P calculation may try to reuse
463 one of the input buffers for its output, so if we computed P first, we would
464 corrupt the input for the q calculation.
465 */
466
467 int
468 rf_RegularPQFunc(node)
469 RF_DagNode_t *node;
470 {
471 RegularQSubr(node, node->results[1]);
472 return (rf_RegularXorFunc(node)); /* does the wakeup */
473 }
474
475 int
476 rf_RegularQFunc(node)
477 RF_DagNode_t *node;
478 {
479 /* Almost ... adjust Qsubr args */
480 RegularQSubr(node, node->results[0]);
481 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
482 * I/O in this node */
483 return (0);
484 }
485 /*
486 Called by singly degraded write code to compute the new parity and the new q.
487
488 structure of the params:
489
490 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
491 failedPDA raidPtr
492
493 for a total of 2d+2 arguments.
494 The result buffers results[0], results[1] are the buffers for the parity and q,
495 respectively.
496
497 We compute Q first, then compute parity. The parity calculation may try to reuse
498 one of the input buffers for its output, so if we computed parity first, we would
499 corrupt the input for the q calculation.
500
501 We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
502 */
503
504 void
505 rf_Degraded_100_PQFunc(node)
506 RF_DagNode_t *node;
507 {
508 int np = node->numParams;
509
510 RF_ASSERT(np >= 2);
511 DegrQSubr(node);
512 rf_RecoveryXorFunc(node);
513 }
514
515
516 /*
517 The two below are used when reading a stripe with a single lost data unit.
518 The parameters are
519
520 pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
521
522 and results[0] contains the data buffer. Which is originally zero-filled.
523
524 */
525
526 /* this Q func is used by the degraded-mode dag functions to recover lost data.
527 * the second-to-last parameter is the PDA for the failed portion of the access.
528 * the code here looks at this PDA and assumes that the xor target buffer is
529 * equal in size to the number of sectors in the failed PDA. It then uses
530 * the other PDAs in the parameter list to determine where within the target
531 * buffer the corresponding data should be xored.
532 *
533 * Recall the basic equation is
534 *
535 * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256
536 *
537 * so to recover data_j we need
538 *
539 * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
540 *
541 * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
542 * copying Q into it. Then we need to do a table lookup to convert to solve
543 * data_j /= J
544 *
545 *
546 */
547 int
548 rf_RecoveryQFunc(node)
549 RF_DagNode_t *node;
550 {
551 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
552 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
553 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
554 int i;
555 RF_PhysDiskAddr_t *pda;
556 RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
557 char *srcbuf, *destbuf;
558 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
559 RF_Etimer_t timer;
560 unsigned long coeff;
561
562 RF_ETIMER_START(timer);
563 /* start by copying Q into the buffer */
564 bcopy(node->params[node->numParams - 3].p, node->results[0],
565 rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
566 for (i = 0; i < node->numParams - 4; i += 2) {
567 RF_ASSERT(node->params[i + 1].p != node->results[0]);
568 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
569 srcbuf = (char *) node->params[i + 1].p;
570 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
571 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
572 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress);
573 /* compute the data unit offset within the column */
574 coeff = (coeff % raidPtr->Layout.numDataCol);
575 rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
576 }
577 /* Do the nasty inversion now */
578 coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol);
579 rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
580 RF_ETIMER_STOP(timer);
581 RF_ETIMER_EVAL(timer);
582 tracerec->q_us += RF_ETIMER_VAL_US(timer);
583 rf_GenericWakeupFunc(node, 0);
584 return (0);
585 }
586
587 int
588 rf_RecoveryPQFunc(node)
589 RF_DagNode_t *node;
590 {
591 RF_PANIC();
592 return (1);
593 }
594 /*
595 Degraded write Q subroutine.
596 Used when P is dead.
597 Large-write style Q computation.
598 Parameters
599
600 (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
601
602 We ignore failedPDA.
603
604 This is a "simple style" recovery func.
605 */
606
607 void
608 rf_PQ_DegradedWriteQFunc(node)
609 RF_DagNode_t *node;
610 {
611 int np = node->numParams;
612 int d;
613 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
614 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
615 int i;
616 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
617 RF_Etimer_t timer;
618 char *qbuf = node->results[0];
619 char *obuf, *qpbuf;
620 RF_PhysDiskAddr_t *old;
621 unsigned long coeff;
622 int fail_start, j;
623
624 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
625 fail_start = old->startSector % secPerSU;
626
627 RF_ETIMER_START(timer);
628
629 d = (np - 2) / 2;
630 RF_ASSERT(2 * d + 2 == np);
631
632 for (i = 0; i < d; i++) {
633 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
634 obuf = (char *) node->params[2 * i + 1].p;
635 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
636 /* compute the data unit offset within the column, then add
637 * one */
638 coeff = (coeff % raidPtr->Layout.numDataCol);
639 j = old->startSector % secPerSU;
640 RF_ASSERT(j >= fail_start);
641 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
642 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
643 }
644
645 RF_ETIMER_STOP(timer);
646 RF_ETIMER_EVAL(timer);
647 tracerec->q_us += RF_ETIMER_VAL_US(timer);
648 rf_GenericWakeupFunc(node, 0);
649 }
650
651
652
653
654 /* Q computations */
655
656 /*
657 coeff - colummn;
658
659 compute dest ^= qfor[28-coeff][rn[coeff+1] a]
660
661 on 5-bit basis;
662 length in bytes;
663 */
664
665 void
666 rf_IncQ(dest, buf, length, coeff)
667 unsigned long *dest;
668 unsigned long *buf;
669 unsigned length;
670 unsigned coeff;
671 {
672 unsigned long a, d, new;
673 unsigned long a1, a2;
674 unsigned int *q = &(rf_qfor[28 - coeff][0]);
675 unsigned r = rf_rn[coeff + 1];
676
677 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
678 #define INSERT(a,i) (a << (5L*i))
679
680 length /= 8;
681 /* 13 5 bit quants in a 64 bit word */
682 while (length) {
683 a = *buf++;
684 d = *dest;
685 a1 = EXTRACT(a, 0) ^ r;
686 a2 = EXTRACT(a, 1) ^ r;
687 new = INSERT(a2, 1) | a1;
688 a1 = EXTRACT(a, 2) ^ r;
689 a2 = EXTRACT(a, 3) ^ r;
690 a1 = q[a1];
691 a2 = q[a2];
692 new = new | INSERT(a1, 2) | INSERT(a2, 3);
693 a1 = EXTRACT(a, 4) ^ r;
694 a2 = EXTRACT(a, 5) ^ r;
695 a1 = q[a1];
696 a2 = q[a2];
697 new = new | INSERT(a1, 4) | INSERT(a2, 5);
698 a1 = EXTRACT(a, 5) ^ r;
699 a2 = EXTRACT(a, 6) ^ r;
700 a1 = q[a1];
701 a2 = q[a2];
702 new = new | INSERT(a1, 5) | INSERT(a2, 6);
703 #if RF_LONGSHIFT > 2
704 a1 = EXTRACT(a, 7) ^ r;
705 a2 = EXTRACT(a, 8) ^ r;
706 a1 = q[a1];
707 a2 = q[a2];
708 new = new | INSERT(a1, 7) | INSERT(a2, 8);
709 a1 = EXTRACT(a, 9) ^ r;
710 a2 = EXTRACT(a, 10) ^ r;
711 a1 = q[a1];
712 a2 = q[a2];
713 new = new | INSERT(a1, 9) | INSERT(a2, 10);
714 a1 = EXTRACT(a, 11) ^ r;
715 a2 = EXTRACT(a, 12) ^ r;
716 a1 = q[a1];
717 a2 = q[a2];
718 new = new | INSERT(a1, 11) | INSERT(a2, 12);
719 #endif /* RF_LONGSHIFT > 2 */
720 d ^= new;
721 *dest++ = d;
722 length--;
723 }
724 }
725 /*
726 compute
727
728 dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
729
730 on a five bit basis.
731 optimization: compute old ^ new on 64 bit basis.
732
733 length in bytes.
734 */
735
736 static void
737 QDelta(
738 char *dest,
739 char *obuf,
740 char *nbuf,
741 unsigned length,
742 unsigned char coeff)
743 {
744 unsigned long a, d, new;
745 unsigned long a1, a2;
746 unsigned int *q = &(rf_qfor[28 - coeff][0]);
747 unsigned int r = rf_rn[coeff + 1];
748
749 r = a1 = a2 = new = d = a = 0; /* XXX for now... */
750 q = NULL; /* XXX for now */
751
752 #ifdef _KERNEL
753 /* PQ in kernel currently not supported because the encoding/decoding
754 * table is not present */
755 bzero(dest, length);
756 #else /* KERNEL */
757 /* this code probably doesn't work and should be rewritten -wvcii */
758 /* 13 5 bit quants in a 64 bit word */
759 length /= 8;
760 while (length) {
761 a = *obuf++; /* XXX need to reorg to avoid cache conflicts */
762 a ^= *nbuf++;
763 d = *dest;
764 a1 = EXTRACT(a, 0) ^ r;
765 a2 = EXTRACT(a, 1) ^ r;
766 a1 = q[a1];
767 a2 = q[a2];
768 new = INSERT(a2, 1) | a1;
769 a1 = EXTRACT(a, 2) ^ r;
770 a2 = EXTRACT(a, 3) ^ r;
771 a1 = q[a1];
772 a2 = q[a2];
773 new = new | INSERT(a1, 2) | INSERT(a2, 3);
774 a1 = EXTRACT(a, 4) ^ r;
775 a2 = EXTRACT(a, 5) ^ r;
776 a1 = q[a1];
777 a2 = q[a2];
778 new = new | INSERT(a1, 4) | INSERT(a2, 5);
779 a1 = EXTRACT(a, 5) ^ r;
780 a2 = EXTRACT(a, 6) ^ r;
781 a1 = q[a1];
782 a2 = q[a2];
783 new = new | INSERT(a1, 5) | INSERT(a2, 6);
784 #if RF_LONGSHIFT > 2
785 a1 = EXTRACT(a, 7) ^ r;
786 a2 = EXTRACT(a, 8) ^ r;
787 a1 = q[a1];
788 a2 = q[a2];
789 new = new | INSERT(a1, 7) | INSERT(a2, 8);
790 a1 = EXTRACT(a, 9) ^ r;
791 a2 = EXTRACT(a, 10) ^ r;
792 a1 = q[a1];
793 a2 = q[a2];
794 new = new | INSERT(a1, 9) | INSERT(a2, 10);
795 a1 = EXTRACT(a, 11) ^ r;
796 a2 = EXTRACT(a, 12) ^ r;
797 a1 = q[a1];
798 a2 = q[a2];
799 new = new | INSERT(a1, 11) | INSERT(a2, 12);
800 #endif /* RF_LONGSHIFT > 2 */
801 d ^= new;
802 *dest++ = d;
803 length--;
804 }
805 #endif /* _KERNEL */
806 }
807 /*
808 recover columns a and b from the given p and q into
809 bufs abuf and bbuf. All bufs are word aligned.
810 Length is in bytes.
811 */
812
813
814 /*
815 * XXX
816 *
817 * Everything about this seems wrong.
818 */
819 void
820 rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b)
821 unsigned long *pbuf;
822 unsigned long *qbuf;
823 unsigned long *abuf;
824 unsigned long *bbuf;
825 unsigned length;
826 unsigned coeff_a;
827 unsigned coeff_b;
828 {
829 unsigned long p, q, a, a0, a1;
830 int col = (29 * coeff_a) + coeff_b;
831 unsigned char *q0 = &(rf_qinv[col][0]);
832
833 length /= 8;
834 while (length) {
835 p = *pbuf++;
836 q = *qbuf++;
837 a0 = EXTRACT(p, 0);
838 a1 = EXTRACT(q, 0);
839 a = q0[a0 << 5 | a1];
840 #define MF(i) \
841 a0 = EXTRACT(p,i); \
842 a1 = EXTRACT(q,i); \
843 a = a | INSERT(q0[a0<<5 | a1],i)
844
845 MF(1);
846 MF(2);
847 MF(3);
848 MF(4);
849 MF(5);
850 MF(6);
851 #if 0
852 MF(7);
853 MF(8);
854 MF(9);
855 MF(10);
856 MF(11);
857 MF(12);
858 #endif /* 0 */
859 *abuf++ = a;
860 *bbuf++ = a ^ p;
861 length--;
862 }
863 }
864 /*
865 Lost parity and a data column. Recover that data column.
866 Assume col coeff is lost. Let q the contents of Q after
867 all surviving data columns have been q-xored out of it.
868 Then we have the equation
869
870 q[28-coeff][a_i ^ r_i+1] = q
871
872 but q is cyclic with period 31.
873 So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
874 q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
875
876 so a_i = r_{coeff+1} ^ q[3+coeff][q]
877
878 The routine is passed q buffer and the buffer
879 the data is to be recoverd into. They can be the same.
880 */
881
882
883
884 static void
885 rf_InvertQ(
886 unsigned long *qbuf,
887 unsigned long *abuf,
888 unsigned length,
889 unsigned coeff)
890 {
891 unsigned long a, new;
892 unsigned long a1, a2;
893 unsigned int *q = &(rf_qfor[3 + coeff][0]);
894 unsigned r = rf_rn[coeff + 1];
895
896 /* 13 5 bit quants in a 64 bit word */
897 length /= 8;
898 while (length) {
899 a = *qbuf++;
900 a1 = EXTRACT(a, 0);
901 a2 = EXTRACT(a, 1);
902 a1 = r ^ q[a1];
903 a2 = r ^ q[a2];
904 new = INSERT(a2, 1) | a1;
905 #define M(i,j) \
906 a1 = EXTRACT(a,i); \
907 a2 = EXTRACT(a,j); \
908 a1 = r ^ q[a1]; \
909 a2 = r ^ q[a2]; \
910 new = new | INSERT(a1,i) | INSERT(a2,j)
911
912 M(2, 3);
913 M(4, 5);
914 M(5, 6);
915 #if RF_LONGSHIFT > 2
916 M(7, 8);
917 M(9, 10);
918 M(11, 12);
919 #endif /* RF_LONGSHIFT > 2 */
920 *abuf++ = new;
921 length--;
922 }
923 }
924 #endif /* (RF_INCLUDE_DECL_PQ > 0) ||
925 * (RF_INCLUDE_RAID6 > 0) */
926