rf_pq.c revision 1.3 1 /* $NetBSD: rf_pq.c,v 1.3 1999/02/05 00:06:14 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Daniel Stodolsky
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /*
30 * Code for RAID level 6 (P + Q) disk array architecture.
31 */
32
33 #include "rf_archs.h"
34 #include "rf_types.h"
35 #include "rf_raid.h"
36 #include "rf_dag.h"
37 #include "rf_dagffrd.h"
38 #include "rf_dagffwr.h"
39 #include "rf_dagdegrd.h"
40 #include "rf_dagdegwr.h"
41 #include "rf_dagutils.h"
42 #include "rf_dagfuncs.h"
43 #include "rf_threadid.h"
44 #include "rf_etimer.h"
45 #include "rf_pqdeg.h"
46 #include "rf_general.h"
47 #include "rf_map.h"
48 #include "rf_pq.h"
49 #include "rf_sys.h"
50
51 RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"};
52 RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"};
53
54 int
55 rf_RegularONPFunc(node)
56 RF_DagNode_t *node;
57 {
58 return (rf_RegularXorFunc(node));
59 }
60 /*
61 same as simpleONQ func, but the coefficient is always 1
62 */
63
64 int
65 rf_SimpleONPFunc(node)
66 RF_DagNode_t *node;
67 {
68 return (rf_SimpleXorFunc(node));
69 }
70
71 int
72 rf_RecoveryPFunc(node)
73 RF_DagNode_t *node;
74 {
75 return (rf_RecoveryXorFunc(node));
76 }
77
78 int
79 rf_RegularPFunc(node)
80 RF_DagNode_t *node;
81 {
82 return (rf_RegularXorFunc(node));
83 }
84 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
85
86 static void
87 QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
88 unsigned char coeff);
89 static void
90 rf_InvertQ(unsigned long *qbuf, unsigned long *abuf,
91 unsigned length, unsigned coeff);
92
93 RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"};
94 RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"};
95 RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"};
96
97 void
98 rf_PQDagSelect(
99 RF_Raid_t * raidPtr,
100 RF_IoType_t type,
101 RF_AccessStripeMap_t * asmap,
102 RF_VoidFuncPtr * createFunc)
103 {
104 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
105 unsigned ndfail = asmap->numDataFailed;
106 unsigned npfail = asmap->numParityFailed;
107 unsigned ntfail = npfail + ndfail;
108
109 RF_ASSERT(RF_IO_IS_R_OR_W(type));
110 if (ntfail > 2) {
111 RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n");
112 /* *infoFunc = */ *createFunc = NULL;
113 return;
114 }
115 /* ok, we can do this I/O */
116 if (type == RF_IO_TYPE_READ) {
117 switch (ndfail) {
118 case 0:
119 /* fault free read */
120 *createFunc = rf_CreateFaultFreeReadDAG; /* same as raid 5 */
121 break;
122 case 1:
123 /* lost a single data unit */
124 /* two cases: (1) parity is not lost. do a normal raid
125 * 5 reconstruct read. (2) parity is lost. do a
126 * reconstruct read using "q". */
127 if (ntfail == 2) { /* also lost redundancy */
128 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
129 *createFunc = rf_PQ_110_CreateReadDAG;
130 else
131 *createFunc = rf_PQ_101_CreateReadDAG;
132 } else {
133 /* P and Q are ok. But is there a failure in
134 * some unaccessed data unit? */
135 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
136 *createFunc = rf_PQ_200_CreateReadDAG;
137 else
138 *createFunc = rf_PQ_100_CreateReadDAG;
139 }
140 break;
141 case 2:
142 /* lost two data units */
143 /* *infoFunc = PQOneTwo; */
144 *createFunc = rf_PQ_200_CreateReadDAG;
145 break;
146 }
147 return;
148 }
149 /* a write */
150 switch (ntfail) {
151 case 0: /* fault free */
152 if (rf_suppressLocksAndLargeWrites ||
153 (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
154 (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
155
156 *createFunc = rf_PQCreateSmallWriteDAG;
157 } else {
158 *createFunc = rf_PQCreateLargeWriteDAG;
159 }
160 break;
161
162 case 1: /* single disk fault */
163 if (npfail == 1) {
164 RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
165 if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like
166 * normal mode raid5
167 * write. */
168 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
169 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
170 *createFunc = rf_PQ_001_CreateSmallWriteDAG;
171 else
172 *createFunc = rf_PQ_001_CreateLargeWriteDAG;
173 } else {/* parity died, small write only updating Q */
174 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
175 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
176 *createFunc = rf_PQ_010_CreateSmallWriteDAG;
177 else
178 *createFunc = rf_PQ_010_CreateLargeWriteDAG;
179 }
180 } else { /* data missing. Do a P reconstruct write if
181 * only a single data unit is lost in the
182 * stripe, otherwise a PQ reconstruct write. */
183 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
184 *createFunc = rf_PQ_200_CreateWriteDAG;
185 else
186 *createFunc = rf_PQ_100_CreateWriteDAG;
187 }
188 break;
189
190 case 2: /* two disk faults */
191 switch (npfail) {
192 case 2: /* both p and q dead */
193 *createFunc = rf_PQ_011_CreateWriteDAG;
194 break;
195 case 1: /* either p or q and dead data */
196 RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
197 RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
198 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
199 *createFunc = rf_PQ_101_CreateWriteDAG;
200 else
201 *createFunc = rf_PQ_110_CreateWriteDAG;
202 break;
203 case 0: /* double data loss */
204 *createFunc = rf_PQ_200_CreateWriteDAG;
205 break;
206 }
207 break;
208
209 default: /* more than 2 disk faults */
210 *createFunc = NULL;
211 RF_PANIC();
212 }
213 return;
214 }
215 /*
216 Used as a stop gap info function
217 */
218 static void
219 PQOne(raidPtr, nSucc, nAnte, asmap)
220 RF_Raid_t *raidPtr;
221 int *nSucc;
222 int *nAnte;
223 RF_AccessStripeMap_t *asmap;
224 {
225 *nSucc = *nAnte = 1;
226 }
227
228 static void
229 PQOneTwo(raidPtr, nSucc, nAnte, asmap)
230 RF_Raid_t *raidPtr;
231 int *nSucc;
232 int *nAnte;
233 RF_AccessStripeMap_t *asmap;
234 {
235 *nSucc = 1;
236 *nAnte = 2;
237 }
238 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
239 {
240 rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2,
241 rf_RegularPQFunc, RF_FALSE);
242 }
243
244 int
245 rf_RegularONQFunc(node)
246 RF_DagNode_t *node;
247 {
248 int np = node->numParams;
249 int d;
250 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
251 int i;
252 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
253 RF_Etimer_t timer;
254 char *qbuf, *qpbuf;
255 char *obuf, *nbuf;
256 RF_PhysDiskAddr_t *old, *new;
257 unsigned long coeff;
258 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
259
260 RF_ETIMER_START(timer);
261
262 d = (np - 3) / 4;
263 RF_ASSERT(4 * d + 3 == np);
264 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
265 for (i = 0; i < d; i++) {
266 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
267 obuf = (char *) node->params[2 * i + 1].p;
268 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
269 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
270 RF_ASSERT(new->numSector == old->numSector);
271 RF_ASSERT(new->raidAddress == old->raidAddress);
272 /* the stripe unit within the stripe tells us the coefficient
273 * to use for the multiply. */
274 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
275 /* compute the data unit offset within the column, then add
276 * one */
277 coeff = (coeff % raidPtr->Layout.numDataCol);
278 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
279 QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
280 }
281
282 RF_ETIMER_STOP(timer);
283 RF_ETIMER_EVAL(timer);
284 tracerec->q_us += RF_ETIMER_VAL_US(timer);
285 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
286 * I/O in this node */
287 return (0);
288 }
289 /*
290 See the SimpleXORFunc for the difference between a simple and regular func.
291 These Q functions should be used for
292
293 new q = Q(data,old data,old q)
294
295 style updates and not for
296
297 q = ( new data, new data, .... )
298
299 computations.
300
301 The simple q takes 2(2d+1)+1 params, where d is the number
302 of stripes written. The order of params is
303 old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
304 [2d] old q pda_0, old q buffer
305 [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d
306 raidPtr
307 */
308
309 int
310 rf_SimpleONQFunc(node)
311 RF_DagNode_t *node;
312 {
313 int np = node->numParams;
314 int d;
315 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
316 int i;
317 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
318 RF_Etimer_t timer;
319 char *qbuf;
320 char *obuf, *nbuf;
321 RF_PhysDiskAddr_t *old, *new;
322 unsigned long coeff;
323
324 RF_ETIMER_START(timer);
325
326 d = (np - 3) / 4;
327 RF_ASSERT(4 * d + 3 == np);
328 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
329 for (i = 0; i < d; i++) {
330 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
331 obuf = (char *) node->params[2 * i + 1].p;
332 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
333 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
334 RF_ASSERT(new->numSector == old->numSector);
335 RF_ASSERT(new->raidAddress == old->raidAddress);
336 /* the stripe unit within the stripe tells us the coefficient
337 * to use for the multiply. */
338 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
339 /* compute the data unit offset within the column, then add
340 * one */
341 coeff = (coeff % raidPtr->Layout.numDataCol);
342 QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
343 }
344
345 RF_ETIMER_STOP(timer);
346 RF_ETIMER_EVAL(timer);
347 tracerec->q_us += RF_ETIMER_VAL_US(timer);
348 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
349 * I/O in this node */
350 return (0);
351 }
352 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
353 {
354 rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);
355 }
356
357 static void
358 RegularQSubr(node, qbuf)
359 RF_DagNode_t *node;
360 char *qbuf;
361 {
362 int np = node->numParams;
363 int d;
364 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
365 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
366 int i;
367 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
368 RF_Etimer_t timer;
369 char *obuf, *qpbuf;
370 RF_PhysDiskAddr_t *old;
371 unsigned long coeff;
372
373 RF_ETIMER_START(timer);
374
375 d = (np - 1) / 2;
376 RF_ASSERT(2 * d + 1 == np);
377 for (i = 0; i < d; i++) {
378 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
379 obuf = (char *) node->params[2 * i + 1].p;
380 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
381 /* compute the data unit offset within the column, then add
382 * one */
383 coeff = (coeff % raidPtr->Layout.numDataCol);
384 /* the input buffers may not all be aligned with the start of
385 * the stripe. so shift by their sector offset within the
386 * stripe unit */
387 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
388 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
389 }
390
391 RF_ETIMER_STOP(timer);
392 RF_ETIMER_EVAL(timer);
393 tracerec->q_us += RF_ETIMER_VAL_US(timer);
394 }
395 /*
396 used in degraded writes.
397 */
398
399 static void
400 DegrQSubr(node)
401 RF_DagNode_t *node;
402 {
403 int np = node->numParams;
404 int d;
405 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
406 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
407 int i;
408 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
409 RF_Etimer_t timer;
410 char *qbuf = node->results[1];
411 char *obuf, *qpbuf;
412 RF_PhysDiskAddr_t *old;
413 unsigned long coeff;
414 unsigned fail_start;
415 int j;
416
417 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
418 fail_start = old->startSector % secPerSU;
419
420 RF_ETIMER_START(timer);
421
422 d = (np - 2) / 2;
423 RF_ASSERT(2 * d + 2 == np);
424 for (i = 0; i < d; i++) {
425 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
426 obuf = (char *) node->params[2 * i + 1].p;
427 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
428 /* compute the data unit offset within the column, then add
429 * one */
430 coeff = (coeff % raidPtr->Layout.numDataCol);
431 /* the input buffers may not all be aligned with the start of
432 * the stripe. so shift by their sector offset within the
433 * stripe unit */
434 j = old->startSector % secPerSU;
435 RF_ASSERT(j >= fail_start);
436 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
437 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
438 }
439
440 RF_ETIMER_STOP(timer);
441 RF_ETIMER_EVAL(timer);
442 tracerec->q_us += RF_ETIMER_VAL_US(timer);
443 }
444 /*
445 Called by large write code to compute the new parity and the new q.
446
447 structure of the params:
448
449 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
450 raidPtr
451
452 for a total of 2d+1 arguments.
453 The result buffers results[0], results[1] are the buffers for the p and q,
454 respectively.
455
456 We compute Q first, then compute P. The P calculation may try to reuse
457 one of the input buffers for its output, so if we computed P first, we would
458 corrupt the input for the q calculation.
459 */
460
461 int
462 rf_RegularPQFunc(node)
463 RF_DagNode_t *node;
464 {
465 RegularQSubr(node, node->results[1]);
466 return (rf_RegularXorFunc(node)); /* does the wakeup */
467 }
468
469 int
470 rf_RegularQFunc(node)
471 RF_DagNode_t *node;
472 {
473 /* Almost ... adjust Qsubr args */
474 RegularQSubr(node, node->results[0]);
475 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
476 * I/O in this node */
477 return (0);
478 }
479 /*
480 Called by singly degraded write code to compute the new parity and the new q.
481
482 structure of the params:
483
484 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
485 failedPDA raidPtr
486
487 for a total of 2d+2 arguments.
488 The result buffers results[0], results[1] are the buffers for the parity and q,
489 respectively.
490
491 We compute Q first, then compute parity. The parity calculation may try to reuse
492 one of the input buffers for its output, so if we computed parity first, we would
493 corrupt the input for the q calculation.
494
495 We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
496 */
497
498 void
499 rf_Degraded_100_PQFunc(node)
500 RF_DagNode_t *node;
501 {
502 int np = node->numParams;
503
504 RF_ASSERT(np >= 2);
505 DegrQSubr(node);
506 rf_RecoveryXorFunc(node);
507 }
508
509
510 /*
511 The two below are used when reading a stripe with a single lost data unit.
512 The parameters are
513
514 pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
515
516 and results[0] contains the data buffer. Which is originally zero-filled.
517
518 */
519
520 /* this Q func is used by the degraded-mode dag functions to recover lost data.
521 * the second-to-last parameter is the PDA for the failed portion of the access.
522 * the code here looks at this PDA and assumes that the xor target buffer is
523 * equal in size to the number of sectors in the failed PDA. It then uses
524 * the other PDAs in the parameter list to determine where within the target
525 * buffer the corresponding data should be xored.
526 *
527 * Recall the basic equation is
528 *
529 * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256
530 *
531 * so to recover data_j we need
532 *
533 * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
534 *
535 * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
536 * copying Q into it. Then we need to do a table lookup to convert to solve
537 * data_j /= J
538 *
539 *
540 */
541 int
542 rf_RecoveryQFunc(node)
543 RF_DagNode_t *node;
544 {
545 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
546 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
547 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
548 int i;
549 RF_PhysDiskAddr_t *pda;
550 RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
551 char *srcbuf, *destbuf;
552 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
553 RF_Etimer_t timer;
554 unsigned long coeff;
555
556 RF_ETIMER_START(timer);
557 /* start by copying Q into the buffer */
558 bcopy(node->params[node->numParams - 3].p, node->results[0],
559 rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
560 for (i = 0; i < node->numParams - 4; i += 2) {
561 RF_ASSERT(node->params[i + 1].p != node->results[0]);
562 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
563 srcbuf = (char *) node->params[i + 1].p;
564 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
565 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
566 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress);
567 /* compute the data unit offset within the column */
568 coeff = (coeff % raidPtr->Layout.numDataCol);
569 rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
570 }
571 /* Do the nasty inversion now */
572 coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol);
573 rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
574 RF_ETIMER_STOP(timer);
575 RF_ETIMER_EVAL(timer);
576 tracerec->q_us += RF_ETIMER_VAL_US(timer);
577 rf_GenericWakeupFunc(node, 0);
578 return (0);
579 }
580
581 int
582 rf_RecoveryPQFunc(node)
583 RF_DagNode_t *node;
584 {
585 RF_PANIC();
586 return (1);
587 }
588 /*
589 Degraded write Q subroutine.
590 Used when P is dead.
591 Large-write style Q computation.
592 Parameters
593
594 (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
595
596 We ignore failedPDA.
597
598 This is a "simple style" recovery func.
599 */
600
601 void
602 rf_PQ_DegradedWriteQFunc(node)
603 RF_DagNode_t *node;
604 {
605 int np = node->numParams;
606 int d;
607 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
608 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
609 int i;
610 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
611 RF_Etimer_t timer;
612 char *qbuf = node->results[0];
613 char *obuf, *qpbuf;
614 RF_PhysDiskAddr_t *old;
615 unsigned long coeff;
616 int fail_start, j;
617
618 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
619 fail_start = old->startSector % secPerSU;
620
621 RF_ETIMER_START(timer);
622
623 d = (np - 2) / 2;
624 RF_ASSERT(2 * d + 2 == np);
625
626 for (i = 0; i < d; i++) {
627 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
628 obuf = (char *) node->params[2 * i + 1].p;
629 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
630 /* compute the data unit offset within the column, then add
631 * one */
632 coeff = (coeff % raidPtr->Layout.numDataCol);
633 j = old->startSector % secPerSU;
634 RF_ASSERT(j >= fail_start);
635 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
636 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
637 }
638
639 RF_ETIMER_STOP(timer);
640 RF_ETIMER_EVAL(timer);
641 tracerec->q_us += RF_ETIMER_VAL_US(timer);
642 rf_GenericWakeupFunc(node, 0);
643 }
644
645
646
647
648 /* Q computations */
649
650 /*
651 coeff - colummn;
652
653 compute dest ^= qfor[28-coeff][rn[coeff+1] a]
654
655 on 5-bit basis;
656 length in bytes;
657 */
658
659 void
660 rf_IncQ(dest, buf, length, coeff)
661 unsigned long *dest;
662 unsigned long *buf;
663 unsigned length;
664 unsigned coeff;
665 {
666 unsigned long a, d, new;
667 unsigned long a1, a2;
668 unsigned int *q = &(rf_qfor[28 - coeff][0]);
669 unsigned r = rf_rn[coeff + 1];
670
671 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
672 #define INSERT(a,i) (a << (5L*i))
673
674 length /= 8;
675 /* 13 5 bit quants in a 64 bit word */
676 while (length) {
677 a = *buf++;
678 d = *dest;
679 a1 = EXTRACT(a, 0) ^ r;
680 a2 = EXTRACT(a, 1) ^ r;
681 new = INSERT(a2, 1) | a1;
682 a1 = EXTRACT(a, 2) ^ r;
683 a2 = EXTRACT(a, 3) ^ r;
684 a1 = q[a1];
685 a2 = q[a2];
686 new = new | INSERT(a1, 2) | INSERT(a2, 3);
687 a1 = EXTRACT(a, 4) ^ r;
688 a2 = EXTRACT(a, 5) ^ r;
689 a1 = q[a1];
690 a2 = q[a2];
691 new = new | INSERT(a1, 4) | INSERT(a2, 5);
692 a1 = EXTRACT(a, 5) ^ r;
693 a2 = EXTRACT(a, 6) ^ r;
694 a1 = q[a1];
695 a2 = q[a2];
696 new = new | INSERT(a1, 5) | INSERT(a2, 6);
697 #if RF_LONGSHIFT > 2
698 a1 = EXTRACT(a, 7) ^ r;
699 a2 = EXTRACT(a, 8) ^ r;
700 a1 = q[a1];
701 a2 = q[a2];
702 new = new | INSERT(a1, 7) | INSERT(a2, 8);
703 a1 = EXTRACT(a, 9) ^ r;
704 a2 = EXTRACT(a, 10) ^ r;
705 a1 = q[a1];
706 a2 = q[a2];
707 new = new | INSERT(a1, 9) | INSERT(a2, 10);
708 a1 = EXTRACT(a, 11) ^ r;
709 a2 = EXTRACT(a, 12) ^ r;
710 a1 = q[a1];
711 a2 = q[a2];
712 new = new | INSERT(a1, 11) | INSERT(a2, 12);
713 #endif /* RF_LONGSHIFT > 2 */
714 d ^= new;
715 *dest++ = d;
716 length--;
717 }
718 }
719 /*
720 compute
721
722 dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
723
724 on a five bit basis.
725 optimization: compute old ^ new on 64 bit basis.
726
727 length in bytes.
728 */
729
730 static void
731 QDelta(
732 char *dest,
733 char *obuf,
734 char *nbuf,
735 unsigned length,
736 unsigned char coeff)
737 {
738 unsigned long a, d, new;
739 unsigned long a1, a2;
740 unsigned int *q = &(rf_qfor[28 - coeff][0]);
741 unsigned r = rf_rn[coeff + 1];
742
743 #ifdef _KERNEL
744 /* PQ in kernel currently not supported because the encoding/decoding
745 * table is not present */
746 bzero(dest, length);
747 #else /* KERNEL */
748 /* this code probably doesn't work and should be rewritten -wvcii */
749 /* 13 5 bit quants in a 64 bit word */
750 length /= 8;
751 while (length) {
752 a = *obuf++; /* XXX need to reorg to avoid cache conflicts */
753 a ^= *nbuf++;
754 d = *dest;
755 a1 = EXTRACT(a, 0) ^ r;
756 a2 = EXTRACT(a, 1) ^ r;
757 a1 = q[a1];
758 a2 = q[a2];
759 new = INSERT(a2, 1) | a1;
760 a1 = EXTRACT(a, 2) ^ r;
761 a2 = EXTRACT(a, 3) ^ r;
762 a1 = q[a1];
763 a2 = q[a2];
764 new = new | INSERT(a1, 2) | INSERT(a2, 3);
765 a1 = EXTRACT(a, 4) ^ r;
766 a2 = EXTRACT(a, 5) ^ r;
767 a1 = q[a1];
768 a2 = q[a2];
769 new = new | INSERT(a1, 4) | INSERT(a2, 5);
770 a1 = EXTRACT(a, 5) ^ r;
771 a2 = EXTRACT(a, 6) ^ r;
772 a1 = q[a1];
773 a2 = q[a2];
774 new = new | INSERT(a1, 5) | INSERT(a2, 6);
775 #if RF_LONGSHIFT > 2
776 a1 = EXTRACT(a, 7) ^ r;
777 a2 = EXTRACT(a, 8) ^ r;
778 a1 = q[a1];
779 a2 = q[a2];
780 new = new | INSERT(a1, 7) | INSERT(a2, 8);
781 a1 = EXTRACT(a, 9) ^ r;
782 a2 = EXTRACT(a, 10) ^ r;
783 a1 = q[a1];
784 a2 = q[a2];
785 new = new | INSERT(a1, 9) | INSERT(a2, 10);
786 a1 = EXTRACT(a, 11) ^ r;
787 a2 = EXTRACT(a, 12) ^ r;
788 a1 = q[a1];
789 a2 = q[a2];
790 new = new | INSERT(a1, 11) | INSERT(a2, 12);
791 #endif /* RF_LONGSHIFT > 2 */
792 d ^= new;
793 *dest++ = d;
794 length--;
795 }
796 #endif /* _KERNEL */
797 }
798 /*
799 recover columns a and b from the given p and q into
800 bufs abuf and bbuf. All bufs are word aligned.
801 Length is in bytes.
802 */
803
804
805 /*
806 * XXX
807 *
808 * Everything about this seems wrong.
809 */
810 void
811 rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b)
812 unsigned long *pbuf;
813 unsigned long *qbuf;
814 unsigned long *abuf;
815 unsigned long *bbuf;
816 unsigned length;
817 unsigned coeff_a;
818 unsigned coeff_b;
819 {
820 unsigned long p, q, a, a0, a1;
821 int col = (29 * coeff_a) + coeff_b;
822 unsigned char *q0 = &(rf_qinv[col][0]);
823
824 length /= 8;
825 while (length) {
826 p = *pbuf++;
827 q = *qbuf++;
828 a0 = EXTRACT(p, 0);
829 a1 = EXTRACT(q, 0);
830 a = q0[a0 << 5 | a1];
831 #define MF(i) \
832 a0 = EXTRACT(p,i); \
833 a1 = EXTRACT(q,i); \
834 a = a | INSERT(q0[a0<<5 | a1],i)
835
836 MF(1);
837 MF(2);
838 MF(3);
839 MF(4);
840 MF(5);
841 MF(6);
842 #if 0
843 MF(7);
844 MF(8);
845 MF(9);
846 MF(10);
847 MF(11);
848 MF(12);
849 #endif /* 0 */
850 *abuf++ = a;
851 *bbuf++ = a ^ p;
852 length--;
853 }
854 }
855 /*
856 Lost parity and a data column. Recover that data column.
857 Assume col coeff is lost. Let q the contents of Q after
858 all surviving data columns have been q-xored out of it.
859 Then we have the equation
860
861 q[28-coeff][a_i ^ r_i+1] = q
862
863 but q is cyclic with period 31.
864 So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
865 q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
866
867 so a_i = r_{coeff+1} ^ q[3+coeff][q]
868
869 The routine is passed q buffer and the buffer
870 the data is to be recoverd into. They can be the same.
871 */
872
873
874
875 static void
876 rf_InvertQ(
877 unsigned long *qbuf,
878 unsigned long *abuf,
879 unsigned length,
880 unsigned coeff)
881 {
882 unsigned long a, new;
883 unsigned long a1, a2;
884 unsigned int *q = &(rf_qfor[3 + coeff][0]);
885 unsigned r = rf_rn[coeff + 1];
886
887 /* 13 5 bit quants in a 64 bit word */
888 length /= 8;
889 while (length) {
890 a = *qbuf++;
891 a1 = EXTRACT(a, 0);
892 a2 = EXTRACT(a, 1);
893 a1 = r ^ q[a1];
894 a2 = r ^ q[a2];
895 new = INSERT(a2, 1) | a1;
896 #define M(i,j) \
897 a1 = EXTRACT(a,i); \
898 a2 = EXTRACT(a,j); \
899 a1 = r ^ q[a1]; \
900 a2 = r ^ q[a2]; \
901 new = new | INSERT(a1,i) | INSERT(a2,j)
902
903 M(2, 3);
904 M(4, 5);
905 M(5, 6);
906 #if RF_LONGSHIFT > 2
907 M(7, 8);
908 M(9, 10);
909 M(11, 12);
910 #endif /* RF_LONGSHIFT > 2 */
911 *abuf++ = new;
912 length--;
913 }
914 }
915 #endif /* (RF_INCLUDE_DECL_PQ > 0) ||
916 * (RF_INCLUDE_RAID6 > 0) */
917