rf_pq.c revision 1.4 1 /* $NetBSD: rf_pq.c,v 1.4 1999/08/13 03:41:57 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Daniel Stodolsky
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /*
30 * Code for RAID level 6 (P + Q) disk array architecture.
31 */
32
33 #include "rf_archs.h"
34 #include "rf_types.h"
35 #include "rf_raid.h"
36 #include "rf_dag.h"
37 #include "rf_dagffrd.h"
38 #include "rf_dagffwr.h"
39 #include "rf_dagdegrd.h"
40 #include "rf_dagdegwr.h"
41 #include "rf_dagutils.h"
42 #include "rf_dagfuncs.h"
43 #include "rf_threadid.h"
44 #include "rf_etimer.h"
45 #include "rf_pqdeg.h"
46 #include "rf_general.h"
47 #include "rf_map.h"
48 #include "rf_pq.h"
49
50 RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"};
51 RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"};
52
53 int
54 rf_RegularONPFunc(node)
55 RF_DagNode_t *node;
56 {
57 return (rf_RegularXorFunc(node));
58 }
59 /*
60 same as simpleONQ func, but the coefficient is always 1
61 */
62
63 int
64 rf_SimpleONPFunc(node)
65 RF_DagNode_t *node;
66 {
67 return (rf_SimpleXorFunc(node));
68 }
69
70 int
71 rf_RecoveryPFunc(node)
72 RF_DagNode_t *node;
73 {
74 return (rf_RecoveryXorFunc(node));
75 }
76
77 int
78 rf_RegularPFunc(node)
79 RF_DagNode_t *node;
80 {
81 return (rf_RegularXorFunc(node));
82 }
83 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
84
85 static void
86 QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
87 unsigned char coeff);
88 static void
89 rf_InvertQ(unsigned long *qbuf, unsigned long *abuf,
90 unsigned length, unsigned coeff);
91
92 RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"};
93 RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"};
94 RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"};
95
96 void
97 rf_PQDagSelect(
98 RF_Raid_t * raidPtr,
99 RF_IoType_t type,
100 RF_AccessStripeMap_t * asmap,
101 RF_VoidFuncPtr * createFunc)
102 {
103 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
104 unsigned ndfail = asmap->numDataFailed;
105 unsigned npfail = asmap->numParityFailed;
106 unsigned ntfail = npfail + ndfail;
107
108 RF_ASSERT(RF_IO_IS_R_OR_W(type));
109 if (ntfail > 2) {
110 RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n");
111 /* *infoFunc = */ *createFunc = NULL;
112 return;
113 }
114 /* ok, we can do this I/O */
115 if (type == RF_IO_TYPE_READ) {
116 switch (ndfail) {
117 case 0:
118 /* fault free read */
119 *createFunc = rf_CreateFaultFreeReadDAG; /* same as raid 5 */
120 break;
121 case 1:
122 /* lost a single data unit */
123 /* two cases: (1) parity is not lost. do a normal raid
124 * 5 reconstruct read. (2) parity is lost. do a
125 * reconstruct read using "q". */
126 if (ntfail == 2) { /* also lost redundancy */
127 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
128 *createFunc = rf_PQ_110_CreateReadDAG;
129 else
130 *createFunc = rf_PQ_101_CreateReadDAG;
131 } else {
132 /* P and Q are ok. But is there a failure in
133 * some unaccessed data unit? */
134 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
135 *createFunc = rf_PQ_200_CreateReadDAG;
136 else
137 *createFunc = rf_PQ_100_CreateReadDAG;
138 }
139 break;
140 case 2:
141 /* lost two data units */
142 /* *infoFunc = PQOneTwo; */
143 *createFunc = rf_PQ_200_CreateReadDAG;
144 break;
145 }
146 return;
147 }
148 /* a write */
149 switch (ntfail) {
150 case 0: /* fault free */
151 if (rf_suppressLocksAndLargeWrites ||
152 (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
153 (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
154
155 *createFunc = rf_PQCreateSmallWriteDAG;
156 } else {
157 *createFunc = rf_PQCreateLargeWriteDAG;
158 }
159 break;
160
161 case 1: /* single disk fault */
162 if (npfail == 1) {
163 RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
164 if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like
165 * normal mode raid5
166 * write. */
167 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
168 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
169 *createFunc = rf_PQ_001_CreateSmallWriteDAG;
170 else
171 *createFunc = rf_PQ_001_CreateLargeWriteDAG;
172 } else {/* parity died, small write only updating Q */
173 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
174 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
175 *createFunc = rf_PQ_010_CreateSmallWriteDAG;
176 else
177 *createFunc = rf_PQ_010_CreateLargeWriteDAG;
178 }
179 } else { /* data missing. Do a P reconstruct write if
180 * only a single data unit is lost in the
181 * stripe, otherwise a PQ reconstruct write. */
182 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
183 *createFunc = rf_PQ_200_CreateWriteDAG;
184 else
185 *createFunc = rf_PQ_100_CreateWriteDAG;
186 }
187 break;
188
189 case 2: /* two disk faults */
190 switch (npfail) {
191 case 2: /* both p and q dead */
192 *createFunc = rf_PQ_011_CreateWriteDAG;
193 break;
194 case 1: /* either p or q and dead data */
195 RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
196 RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
197 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
198 *createFunc = rf_PQ_101_CreateWriteDAG;
199 else
200 *createFunc = rf_PQ_110_CreateWriteDAG;
201 break;
202 case 0: /* double data loss */
203 *createFunc = rf_PQ_200_CreateWriteDAG;
204 break;
205 }
206 break;
207
208 default: /* more than 2 disk faults */
209 *createFunc = NULL;
210 RF_PANIC();
211 }
212 return;
213 }
214 /*
215 Used as a stop gap info function
216 */
217 static void
218 PQOne(raidPtr, nSucc, nAnte, asmap)
219 RF_Raid_t *raidPtr;
220 int *nSucc;
221 int *nAnte;
222 RF_AccessStripeMap_t *asmap;
223 {
224 *nSucc = *nAnte = 1;
225 }
226
227 static void
228 PQOneTwo(raidPtr, nSucc, nAnte, asmap)
229 RF_Raid_t *raidPtr;
230 int *nSucc;
231 int *nAnte;
232 RF_AccessStripeMap_t *asmap;
233 {
234 *nSucc = 1;
235 *nAnte = 2;
236 }
237 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
238 {
239 rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2,
240 rf_RegularPQFunc, RF_FALSE);
241 }
242
243 int
244 rf_RegularONQFunc(node)
245 RF_DagNode_t *node;
246 {
247 int np = node->numParams;
248 int d;
249 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
250 int i;
251 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
252 RF_Etimer_t timer;
253 char *qbuf, *qpbuf;
254 char *obuf, *nbuf;
255 RF_PhysDiskAddr_t *old, *new;
256 unsigned long coeff;
257 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
258
259 RF_ETIMER_START(timer);
260
261 d = (np - 3) / 4;
262 RF_ASSERT(4 * d + 3 == np);
263 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
264 for (i = 0; i < d; i++) {
265 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
266 obuf = (char *) node->params[2 * i + 1].p;
267 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
268 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
269 RF_ASSERT(new->numSector == old->numSector);
270 RF_ASSERT(new->raidAddress == old->raidAddress);
271 /* the stripe unit within the stripe tells us the coefficient
272 * to use for the multiply. */
273 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
274 /* compute the data unit offset within the column, then add
275 * one */
276 coeff = (coeff % raidPtr->Layout.numDataCol);
277 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
278 QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
279 }
280
281 RF_ETIMER_STOP(timer);
282 RF_ETIMER_EVAL(timer);
283 tracerec->q_us += RF_ETIMER_VAL_US(timer);
284 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
285 * I/O in this node */
286 return (0);
287 }
288 /*
289 See the SimpleXORFunc for the difference between a simple and regular func.
290 These Q functions should be used for
291
292 new q = Q(data,old data,old q)
293
294 style updates and not for
295
296 q = ( new data, new data, .... )
297
298 computations.
299
300 The simple q takes 2(2d+1)+1 params, where d is the number
301 of stripes written. The order of params is
302 old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
303 [2d] old q pda_0, old q buffer
304 [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d
305 raidPtr
306 */
307
308 int
309 rf_SimpleONQFunc(node)
310 RF_DagNode_t *node;
311 {
312 int np = node->numParams;
313 int d;
314 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
315 int i;
316 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
317 RF_Etimer_t timer;
318 char *qbuf;
319 char *obuf, *nbuf;
320 RF_PhysDiskAddr_t *old, *new;
321 unsigned long coeff;
322
323 RF_ETIMER_START(timer);
324
325 d = (np - 3) / 4;
326 RF_ASSERT(4 * d + 3 == np);
327 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
328 for (i = 0; i < d; i++) {
329 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
330 obuf = (char *) node->params[2 * i + 1].p;
331 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
332 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
333 RF_ASSERT(new->numSector == old->numSector);
334 RF_ASSERT(new->raidAddress == old->raidAddress);
335 /* the stripe unit within the stripe tells us the coefficient
336 * to use for the multiply. */
337 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
338 /* compute the data unit offset within the column, then add
339 * one */
340 coeff = (coeff % raidPtr->Layout.numDataCol);
341 QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
342 }
343
344 RF_ETIMER_STOP(timer);
345 RF_ETIMER_EVAL(timer);
346 tracerec->q_us += RF_ETIMER_VAL_US(timer);
347 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
348 * I/O in this node */
349 return (0);
350 }
351 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
352 {
353 rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);
354 }
355
356 static void
357 RegularQSubr(node, qbuf)
358 RF_DagNode_t *node;
359 char *qbuf;
360 {
361 int np = node->numParams;
362 int d;
363 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
364 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
365 int i;
366 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
367 RF_Etimer_t timer;
368 char *obuf, *qpbuf;
369 RF_PhysDiskAddr_t *old;
370 unsigned long coeff;
371
372 RF_ETIMER_START(timer);
373
374 d = (np - 1) / 2;
375 RF_ASSERT(2 * d + 1 == np);
376 for (i = 0; i < d; i++) {
377 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
378 obuf = (char *) node->params[2 * i + 1].p;
379 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
380 /* compute the data unit offset within the column, then add
381 * one */
382 coeff = (coeff % raidPtr->Layout.numDataCol);
383 /* the input buffers may not all be aligned with the start of
384 * the stripe. so shift by their sector offset within the
385 * stripe unit */
386 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
387 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
388 }
389
390 RF_ETIMER_STOP(timer);
391 RF_ETIMER_EVAL(timer);
392 tracerec->q_us += RF_ETIMER_VAL_US(timer);
393 }
394 /*
395 used in degraded writes.
396 */
397
398 static void
399 DegrQSubr(node)
400 RF_DagNode_t *node;
401 {
402 int np = node->numParams;
403 int d;
404 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
405 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
406 int i;
407 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
408 RF_Etimer_t timer;
409 char *qbuf = node->results[1];
410 char *obuf, *qpbuf;
411 RF_PhysDiskAddr_t *old;
412 unsigned long coeff;
413 unsigned fail_start;
414 int j;
415
416 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
417 fail_start = old->startSector % secPerSU;
418
419 RF_ETIMER_START(timer);
420
421 d = (np - 2) / 2;
422 RF_ASSERT(2 * d + 2 == np);
423 for (i = 0; i < d; i++) {
424 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
425 obuf = (char *) node->params[2 * i + 1].p;
426 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
427 /* compute the data unit offset within the column, then add
428 * one */
429 coeff = (coeff % raidPtr->Layout.numDataCol);
430 /* the input buffers may not all be aligned with the start of
431 * the stripe. so shift by their sector offset within the
432 * stripe unit */
433 j = old->startSector % secPerSU;
434 RF_ASSERT(j >= fail_start);
435 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
436 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
437 }
438
439 RF_ETIMER_STOP(timer);
440 RF_ETIMER_EVAL(timer);
441 tracerec->q_us += RF_ETIMER_VAL_US(timer);
442 }
443 /*
444 Called by large write code to compute the new parity and the new q.
445
446 structure of the params:
447
448 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
449 raidPtr
450
451 for a total of 2d+1 arguments.
452 The result buffers results[0], results[1] are the buffers for the p and q,
453 respectively.
454
455 We compute Q first, then compute P. The P calculation may try to reuse
456 one of the input buffers for its output, so if we computed P first, we would
457 corrupt the input for the q calculation.
458 */
459
460 int
461 rf_RegularPQFunc(node)
462 RF_DagNode_t *node;
463 {
464 RegularQSubr(node, node->results[1]);
465 return (rf_RegularXorFunc(node)); /* does the wakeup */
466 }
467
468 int
469 rf_RegularQFunc(node)
470 RF_DagNode_t *node;
471 {
472 /* Almost ... adjust Qsubr args */
473 RegularQSubr(node, node->results[0]);
474 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
475 * I/O in this node */
476 return (0);
477 }
478 /*
479 Called by singly degraded write code to compute the new parity and the new q.
480
481 structure of the params:
482
483 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
484 failedPDA raidPtr
485
486 for a total of 2d+2 arguments.
487 The result buffers results[0], results[1] are the buffers for the parity and q,
488 respectively.
489
490 We compute Q first, then compute parity. The parity calculation may try to reuse
491 one of the input buffers for its output, so if we computed parity first, we would
492 corrupt the input for the q calculation.
493
494 We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
495 */
496
497 void
498 rf_Degraded_100_PQFunc(node)
499 RF_DagNode_t *node;
500 {
501 int np = node->numParams;
502
503 RF_ASSERT(np >= 2);
504 DegrQSubr(node);
505 rf_RecoveryXorFunc(node);
506 }
507
508
509 /*
510 The two below are used when reading a stripe with a single lost data unit.
511 The parameters are
512
513 pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
514
515 and results[0] contains the data buffer. Which is originally zero-filled.
516
517 */
518
519 /* this Q func is used by the degraded-mode dag functions to recover lost data.
520 * the second-to-last parameter is the PDA for the failed portion of the access.
521 * the code here looks at this PDA and assumes that the xor target buffer is
522 * equal in size to the number of sectors in the failed PDA. It then uses
523 * the other PDAs in the parameter list to determine where within the target
524 * buffer the corresponding data should be xored.
525 *
526 * Recall the basic equation is
527 *
528 * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256
529 *
530 * so to recover data_j we need
531 *
532 * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
533 *
534 * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
535 * copying Q into it. Then we need to do a table lookup to convert to solve
536 * data_j /= J
537 *
538 *
539 */
540 int
541 rf_RecoveryQFunc(node)
542 RF_DagNode_t *node;
543 {
544 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
545 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
546 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
547 int i;
548 RF_PhysDiskAddr_t *pda;
549 RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
550 char *srcbuf, *destbuf;
551 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
552 RF_Etimer_t timer;
553 unsigned long coeff;
554
555 RF_ETIMER_START(timer);
556 /* start by copying Q into the buffer */
557 bcopy(node->params[node->numParams - 3].p, node->results[0],
558 rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
559 for (i = 0; i < node->numParams - 4; i += 2) {
560 RF_ASSERT(node->params[i + 1].p != node->results[0]);
561 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
562 srcbuf = (char *) node->params[i + 1].p;
563 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
564 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
565 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress);
566 /* compute the data unit offset within the column */
567 coeff = (coeff % raidPtr->Layout.numDataCol);
568 rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
569 }
570 /* Do the nasty inversion now */
571 coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol);
572 rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
573 RF_ETIMER_STOP(timer);
574 RF_ETIMER_EVAL(timer);
575 tracerec->q_us += RF_ETIMER_VAL_US(timer);
576 rf_GenericWakeupFunc(node, 0);
577 return (0);
578 }
579
580 int
581 rf_RecoveryPQFunc(node)
582 RF_DagNode_t *node;
583 {
584 RF_PANIC();
585 return (1);
586 }
587 /*
588 Degraded write Q subroutine.
589 Used when P is dead.
590 Large-write style Q computation.
591 Parameters
592
593 (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
594
595 We ignore failedPDA.
596
597 This is a "simple style" recovery func.
598 */
599
600 void
601 rf_PQ_DegradedWriteQFunc(node)
602 RF_DagNode_t *node;
603 {
604 int np = node->numParams;
605 int d;
606 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
607 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
608 int i;
609 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
610 RF_Etimer_t timer;
611 char *qbuf = node->results[0];
612 char *obuf, *qpbuf;
613 RF_PhysDiskAddr_t *old;
614 unsigned long coeff;
615 int fail_start, j;
616
617 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
618 fail_start = old->startSector % secPerSU;
619
620 RF_ETIMER_START(timer);
621
622 d = (np - 2) / 2;
623 RF_ASSERT(2 * d + 2 == np);
624
625 for (i = 0; i < d; i++) {
626 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
627 obuf = (char *) node->params[2 * i + 1].p;
628 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
629 /* compute the data unit offset within the column, then add
630 * one */
631 coeff = (coeff % raidPtr->Layout.numDataCol);
632 j = old->startSector % secPerSU;
633 RF_ASSERT(j >= fail_start);
634 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
635 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
636 }
637
638 RF_ETIMER_STOP(timer);
639 RF_ETIMER_EVAL(timer);
640 tracerec->q_us += RF_ETIMER_VAL_US(timer);
641 rf_GenericWakeupFunc(node, 0);
642 }
643
644
645
646
647 /* Q computations */
648
649 /*
650 coeff - colummn;
651
652 compute dest ^= qfor[28-coeff][rn[coeff+1] a]
653
654 on 5-bit basis;
655 length in bytes;
656 */
657
658 void
659 rf_IncQ(dest, buf, length, coeff)
660 unsigned long *dest;
661 unsigned long *buf;
662 unsigned length;
663 unsigned coeff;
664 {
665 unsigned long a, d, new;
666 unsigned long a1, a2;
667 unsigned int *q = &(rf_qfor[28 - coeff][0]);
668 unsigned r = rf_rn[coeff + 1];
669
670 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
671 #define INSERT(a,i) (a << (5L*i))
672
673 length /= 8;
674 /* 13 5 bit quants in a 64 bit word */
675 while (length) {
676 a = *buf++;
677 d = *dest;
678 a1 = EXTRACT(a, 0) ^ r;
679 a2 = EXTRACT(a, 1) ^ r;
680 new = INSERT(a2, 1) | a1;
681 a1 = EXTRACT(a, 2) ^ r;
682 a2 = EXTRACT(a, 3) ^ r;
683 a1 = q[a1];
684 a2 = q[a2];
685 new = new | INSERT(a1, 2) | INSERT(a2, 3);
686 a1 = EXTRACT(a, 4) ^ r;
687 a2 = EXTRACT(a, 5) ^ r;
688 a1 = q[a1];
689 a2 = q[a2];
690 new = new | INSERT(a1, 4) | INSERT(a2, 5);
691 a1 = EXTRACT(a, 5) ^ r;
692 a2 = EXTRACT(a, 6) ^ r;
693 a1 = q[a1];
694 a2 = q[a2];
695 new = new | INSERT(a1, 5) | INSERT(a2, 6);
696 #if RF_LONGSHIFT > 2
697 a1 = EXTRACT(a, 7) ^ r;
698 a2 = EXTRACT(a, 8) ^ r;
699 a1 = q[a1];
700 a2 = q[a2];
701 new = new | INSERT(a1, 7) | INSERT(a2, 8);
702 a1 = EXTRACT(a, 9) ^ r;
703 a2 = EXTRACT(a, 10) ^ r;
704 a1 = q[a1];
705 a2 = q[a2];
706 new = new | INSERT(a1, 9) | INSERT(a2, 10);
707 a1 = EXTRACT(a, 11) ^ r;
708 a2 = EXTRACT(a, 12) ^ r;
709 a1 = q[a1];
710 a2 = q[a2];
711 new = new | INSERT(a1, 11) | INSERT(a2, 12);
712 #endif /* RF_LONGSHIFT > 2 */
713 d ^= new;
714 *dest++ = d;
715 length--;
716 }
717 }
718 /*
719 compute
720
721 dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
722
723 on a five bit basis.
724 optimization: compute old ^ new on 64 bit basis.
725
726 length in bytes.
727 */
728
729 static void
730 QDelta(
731 char *dest,
732 char *obuf,
733 char *nbuf,
734 unsigned length,
735 unsigned char coeff)
736 {
737 unsigned long a, d, new;
738 unsigned long a1, a2;
739 unsigned int *q = &(rf_qfor[28 - coeff][0]);
740 unsigned r = rf_rn[coeff + 1];
741
742 #ifdef _KERNEL
743 /* PQ in kernel currently not supported because the encoding/decoding
744 * table is not present */
745 bzero(dest, length);
746 #else /* KERNEL */
747 /* this code probably doesn't work and should be rewritten -wvcii */
748 /* 13 5 bit quants in a 64 bit word */
749 length /= 8;
750 while (length) {
751 a = *obuf++; /* XXX need to reorg to avoid cache conflicts */
752 a ^= *nbuf++;
753 d = *dest;
754 a1 = EXTRACT(a, 0) ^ r;
755 a2 = EXTRACT(a, 1) ^ r;
756 a1 = q[a1];
757 a2 = q[a2];
758 new = INSERT(a2, 1) | a1;
759 a1 = EXTRACT(a, 2) ^ r;
760 a2 = EXTRACT(a, 3) ^ r;
761 a1 = q[a1];
762 a2 = q[a2];
763 new = new | INSERT(a1, 2) | INSERT(a2, 3);
764 a1 = EXTRACT(a, 4) ^ r;
765 a2 = EXTRACT(a, 5) ^ r;
766 a1 = q[a1];
767 a2 = q[a2];
768 new = new | INSERT(a1, 4) | INSERT(a2, 5);
769 a1 = EXTRACT(a, 5) ^ r;
770 a2 = EXTRACT(a, 6) ^ r;
771 a1 = q[a1];
772 a2 = q[a2];
773 new = new | INSERT(a1, 5) | INSERT(a2, 6);
774 #if RF_LONGSHIFT > 2
775 a1 = EXTRACT(a, 7) ^ r;
776 a2 = EXTRACT(a, 8) ^ r;
777 a1 = q[a1];
778 a2 = q[a2];
779 new = new | INSERT(a1, 7) | INSERT(a2, 8);
780 a1 = EXTRACT(a, 9) ^ r;
781 a2 = EXTRACT(a, 10) ^ r;
782 a1 = q[a1];
783 a2 = q[a2];
784 new = new | INSERT(a1, 9) | INSERT(a2, 10);
785 a1 = EXTRACT(a, 11) ^ r;
786 a2 = EXTRACT(a, 12) ^ r;
787 a1 = q[a1];
788 a2 = q[a2];
789 new = new | INSERT(a1, 11) | INSERT(a2, 12);
790 #endif /* RF_LONGSHIFT > 2 */
791 d ^= new;
792 *dest++ = d;
793 length--;
794 }
795 #endif /* _KERNEL */
796 }
797 /*
798 recover columns a and b from the given p and q into
799 bufs abuf and bbuf. All bufs are word aligned.
800 Length is in bytes.
801 */
802
803
804 /*
805 * XXX
806 *
807 * Everything about this seems wrong.
808 */
809 void
810 rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b)
811 unsigned long *pbuf;
812 unsigned long *qbuf;
813 unsigned long *abuf;
814 unsigned long *bbuf;
815 unsigned length;
816 unsigned coeff_a;
817 unsigned coeff_b;
818 {
819 unsigned long p, q, a, a0, a1;
820 int col = (29 * coeff_a) + coeff_b;
821 unsigned char *q0 = &(rf_qinv[col][0]);
822
823 length /= 8;
824 while (length) {
825 p = *pbuf++;
826 q = *qbuf++;
827 a0 = EXTRACT(p, 0);
828 a1 = EXTRACT(q, 0);
829 a = q0[a0 << 5 | a1];
830 #define MF(i) \
831 a0 = EXTRACT(p,i); \
832 a1 = EXTRACT(q,i); \
833 a = a | INSERT(q0[a0<<5 | a1],i)
834
835 MF(1);
836 MF(2);
837 MF(3);
838 MF(4);
839 MF(5);
840 MF(6);
841 #if 0
842 MF(7);
843 MF(8);
844 MF(9);
845 MF(10);
846 MF(11);
847 MF(12);
848 #endif /* 0 */
849 *abuf++ = a;
850 *bbuf++ = a ^ p;
851 length--;
852 }
853 }
854 /*
855 Lost parity and a data column. Recover that data column.
856 Assume col coeff is lost. Let q the contents of Q after
857 all surviving data columns have been q-xored out of it.
858 Then we have the equation
859
860 q[28-coeff][a_i ^ r_i+1] = q
861
862 but q is cyclic with period 31.
863 So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
864 q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
865
866 so a_i = r_{coeff+1} ^ q[3+coeff][q]
867
868 The routine is passed q buffer and the buffer
869 the data is to be recoverd into. They can be the same.
870 */
871
872
873
874 static void
875 rf_InvertQ(
876 unsigned long *qbuf,
877 unsigned long *abuf,
878 unsigned length,
879 unsigned coeff)
880 {
881 unsigned long a, new;
882 unsigned long a1, a2;
883 unsigned int *q = &(rf_qfor[3 + coeff][0]);
884 unsigned r = rf_rn[coeff + 1];
885
886 /* 13 5 bit quants in a 64 bit word */
887 length /= 8;
888 while (length) {
889 a = *qbuf++;
890 a1 = EXTRACT(a, 0);
891 a2 = EXTRACT(a, 1);
892 a1 = r ^ q[a1];
893 a2 = r ^ q[a2];
894 new = INSERT(a2, 1) | a1;
895 #define M(i,j) \
896 a1 = EXTRACT(a,i); \
897 a2 = EXTRACT(a,j); \
898 a1 = r ^ q[a1]; \
899 a2 = r ^ q[a2]; \
900 new = new | INSERT(a1,i) | INSERT(a2,j)
901
902 M(2, 3);
903 M(4, 5);
904 M(5, 6);
905 #if RF_LONGSHIFT > 2
906 M(7, 8);
907 M(9, 10);
908 M(11, 12);
909 #endif /* RF_LONGSHIFT > 2 */
910 *abuf++ = new;
911 length--;
912 }
913 }
914 #endif /* (RF_INCLUDE_DECL_PQ > 0) ||
915 * (RF_INCLUDE_RAID6 > 0) */
916