rf_dagfuncs.c revision 1.11 1 /* $NetBSD: rf_dagfuncs.c,v 1.11 2002/11/18 23:46:28 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland, William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /*
30 * dagfuncs.c -- DAG node execution routines
31 *
32 * Rules:
33 * 1. Every DAG execution function must eventually cause node->status to
34 * get set to "good" or "bad", and "FinishNode" to be called. In the
35 * case of nodes that complete immediately (xor, NullNodeFunc, etc),
36 * the node execution function can do these two things directly. In
37 * the case of nodes that have to wait for some event (a disk read to
38 * complete, a lock to be released, etc) to occur before they can
39 * complete, this is typically achieved by having whatever module
40 * is doing the operation call GenericWakeupFunc upon completion.
41 * 2. DAG execution functions should check the status in the DAG header
42 * and NOP out their operations if the status is not "enable". However,
43 * execution functions that release resources must be sure to release
44 * them even when they NOP out the function that would use them.
45 * Functions that acquire resources should go ahead and acquire them
46 * even when they NOP, so that a downstream release node will not have
47 * to check to find out whether or not the acquire was suppressed.
48 */
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: rf_dagfuncs.c,v 1.11 2002/11/18 23:46:28 oster Exp $");
52
53 #include <sys/param.h>
54 #include <sys/ioctl.h>
55
56 #include "rf_archs.h"
57 #include "rf_raid.h"
58 #include "rf_dag.h"
59 #include "rf_layout.h"
60 #include "rf_etimer.h"
61 #include "rf_acctrace.h"
62 #include "rf_diskqueue.h"
63 #include "rf_dagfuncs.h"
64 #include "rf_general.h"
65 #include "rf_engine.h"
66 #include "rf_dagutils.h"
67
68 #include "rf_kintf.h"
69
70 #if RF_INCLUDE_PARITYLOGGING > 0
71 #include "rf_paritylog.h"
72 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
73
74 int (*rf_DiskReadFunc) (RF_DagNode_t *);
75 int (*rf_DiskWriteFunc) (RF_DagNode_t *);
76 int (*rf_DiskReadUndoFunc) (RF_DagNode_t *);
77 int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *);
78 int (*rf_DiskUnlockFunc) (RF_DagNode_t *);
79 int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *);
80 int (*rf_RegularXorUndoFunc) (RF_DagNode_t *);
81 int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *);
82 int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *);
83
84 /*****************************************************************************************
85 * main (only) configuration routine for this module
86 ****************************************************************************************/
87 int
88 rf_ConfigureDAGFuncs(listp)
89 RF_ShutdownList_t **listp;
90 {
91 RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) || ((sizeof(long) == 4) && RF_LONGSHIFT == 2));
92 rf_DiskReadFunc = rf_DiskReadFuncForThreads;
93 rf_DiskReadUndoFunc = rf_DiskUndoFunc;
94 rf_DiskWriteFunc = rf_DiskWriteFuncForThreads;
95 rf_DiskWriteUndoFunc = rf_DiskUndoFunc;
96 rf_DiskUnlockFunc = rf_DiskUnlockFuncForThreads;
97 rf_DiskUnlockUndoFunc = rf_NullNodeUndoFunc;
98 rf_RegularXorUndoFunc = rf_NullNodeUndoFunc;
99 rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc;
100 rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc;
101 return (0);
102 }
103
104
105
106 /*****************************************************************************************
107 * the execution function associated with a terminate node
108 ****************************************************************************************/
109 int
110 rf_TerminateFunc(node)
111 RF_DagNode_t *node;
112 {
113 RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes);
114 node->status = rf_good;
115 return (rf_FinishNode(node, RF_THREAD_CONTEXT));
116 }
117
118 int
119 rf_TerminateUndoFunc(node)
120 RF_DagNode_t *node;
121 {
122 return (0);
123 }
124
125
126 /*****************************************************************************************
127 * execution functions associated with a mirror node
128 *
129 * parameters:
130 *
131 * 0 - physical disk addres of data
132 * 1 - buffer for holding read data
133 * 2 - parity stripe ID
134 * 3 - flags
135 * 4 - physical disk address of mirror (parity)
136 *
137 ****************************************************************************************/
138
139 int
140 rf_DiskReadMirrorIdleFunc(node)
141 RF_DagNode_t *node;
142 {
143 /* select the mirror copy with the shortest queue and fill in node
144 * parameters with physical disk address */
145
146 rf_SelectMirrorDiskIdle(node);
147 return (rf_DiskReadFunc(node));
148 }
149
150 #if (RF_INCLUDE_CHAINDECLUSTER > 0) || (RF_INCLUDE_INTERDECLUSTER > 0) || (RF_DEBUG_VALIDATE_DAG > 0)
151 int
152 rf_DiskReadMirrorPartitionFunc(node)
153 RF_DagNode_t *node;
154 {
155 /* select the mirror copy with the shortest queue and fill in node
156 * parameters with physical disk address */
157
158 rf_SelectMirrorDiskPartition(node);
159 return (rf_DiskReadFunc(node));
160 }
161 #endif
162
163 int
164 rf_DiskReadMirrorUndoFunc(node)
165 RF_DagNode_t *node;
166 {
167 return (0);
168 }
169
170
171
172 #if RF_INCLUDE_PARITYLOGGING > 0
173 /*****************************************************************************************
174 * the execution function associated with a parity log update node
175 ****************************************************************************************/
176 int
177 rf_ParityLogUpdateFunc(node)
178 RF_DagNode_t *node;
179 {
180 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
181 caddr_t buf = (caddr_t) node->params[1].p;
182 RF_ParityLogData_t *logData;
183 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
184 RF_Etimer_t timer;
185
186 if (node->dagHdr->status == rf_enable) {
187 RF_ETIMER_START(timer);
188 logData = rf_CreateParityLogData(RF_UPDATE, pda, buf,
189 (RF_Raid_t *) (node->dagHdr->raidPtr),
190 node->wakeFunc, (void *) node,
191 node->dagHdr->tracerec, timer);
192 if (logData)
193 rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE);
194 else {
195 RF_ETIMER_STOP(timer);
196 RF_ETIMER_EVAL(timer);
197 tracerec->plog_us += RF_ETIMER_VAL_US(timer);
198 (node->wakeFunc) (node, ENOMEM);
199 }
200 }
201 return (0);
202 }
203
204
205 /*****************************************************************************************
206 * the execution function associated with a parity log overwrite node
207 ****************************************************************************************/
208 int
209 rf_ParityLogOverwriteFunc(node)
210 RF_DagNode_t *node;
211 {
212 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
213 caddr_t buf = (caddr_t) node->params[1].p;
214 RF_ParityLogData_t *logData;
215 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
216 RF_Etimer_t timer;
217
218 if (node->dagHdr->status == rf_enable) {
219 RF_ETIMER_START(timer);
220 logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf, (RF_Raid_t *) (node->dagHdr->raidPtr),
221 node->wakeFunc, (void *) node, node->dagHdr->tracerec, timer);
222 if (logData)
223 rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE);
224 else {
225 RF_ETIMER_STOP(timer);
226 RF_ETIMER_EVAL(timer);
227 tracerec->plog_us += RF_ETIMER_VAL_US(timer);
228 (node->wakeFunc) (node, ENOMEM);
229 }
230 }
231 return (0);
232 }
233
234 int
235 rf_ParityLogUpdateUndoFunc(node)
236 RF_DagNode_t *node;
237 {
238 return (0);
239 }
240
241 int
242 rf_ParityLogOverwriteUndoFunc(node)
243 RF_DagNode_t *node;
244 {
245 return (0);
246 }
247 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
248
249 /*****************************************************************************************
250 * the execution function associated with a NOP node
251 ****************************************************************************************/
252 int
253 rf_NullNodeFunc(node)
254 RF_DagNode_t *node;
255 {
256 node->status = rf_good;
257 return (rf_FinishNode(node, RF_THREAD_CONTEXT));
258 }
259
260 int
261 rf_NullNodeUndoFunc(node)
262 RF_DagNode_t *node;
263 {
264 node->status = rf_undone;
265 return (rf_FinishNode(node, RF_THREAD_CONTEXT));
266 }
267
268
269 /*****************************************************************************************
270 * the execution function associated with a disk-read node
271 ****************************************************************************************/
272 int
273 rf_DiskReadFuncForThreads(node)
274 RF_DagNode_t *node;
275 {
276 RF_DiskQueueData_t *req;
277 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
278 caddr_t buf = (caddr_t) node->params[1].p;
279 RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v;
280 unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v);
281 unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
282 unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
283 unsigned which_ru = RF_EXTRACT_RU(node->params[3].v);
284 RF_DiskQueueDataFlags_t flags = 0;
285 RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_READ : RF_IO_TYPE_NOP;
286 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
287 void *b_proc = NULL;
288
289 if (node->dagHdr->bp)
290 b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc;
291
292 RF_ASSERT(!(lock && unlock));
293 flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0;
294 flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0;
295
296 req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector,
297 buf, parityStripeID, which_ru,
298 (int (*) (void *, int)) node->wakeFunc,
299 node, NULL, node->dagHdr->tracerec,
300 (void *) (node->dagHdr->raidPtr), flags, b_proc);
301 if (!req) {
302 (node->wakeFunc) (node, ENOMEM);
303 } else {
304 node->dagFuncData = (void *) req;
305 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority);
306 }
307 return (0);
308 }
309
310
311 /*****************************************************************************************
312 * the execution function associated with a disk-write node
313 ****************************************************************************************/
314 int
315 rf_DiskWriteFuncForThreads(node)
316 RF_DagNode_t *node;
317 {
318 RF_DiskQueueData_t *req;
319 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
320 caddr_t buf = (caddr_t) node->params[1].p;
321 RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v;
322 unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v);
323 unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
324 unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
325 unsigned which_ru = RF_EXTRACT_RU(node->params[3].v);
326 RF_DiskQueueDataFlags_t flags = 0;
327 RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP;
328 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
329 void *b_proc = NULL;
330
331 if (node->dagHdr->bp)
332 b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc;
333
334 /* normal processing (rollaway or forward recovery) begins here */
335 RF_ASSERT(!(lock && unlock));
336 flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0;
337 flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0;
338 req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector,
339 buf, parityStripeID, which_ru,
340 (int (*) (void *, int)) node->wakeFunc,
341 (void *) node, NULL,
342 node->dagHdr->tracerec,
343 (void *) (node->dagHdr->raidPtr),
344 flags, b_proc);
345
346 if (!req) {
347 (node->wakeFunc) (node, ENOMEM);
348 } else {
349 node->dagFuncData = (void *) req;
350 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority);
351 }
352
353 return (0);
354 }
355 /*****************************************************************************************
356 * the undo function for disk nodes
357 * Note: this is not a proper undo of a write node, only locks are released.
358 * old data is not restored to disk!
359 ****************************************************************************************/
360 int
361 rf_DiskUndoFunc(node)
362 RF_DagNode_t *node;
363 {
364 RF_DiskQueueData_t *req;
365 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
366 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
367
368 req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP,
369 0L, 0, NULL, 0L, 0,
370 (int (*) (void *, int)) node->wakeFunc,
371 (void *) node,
372 NULL, node->dagHdr->tracerec,
373 (void *) (node->dagHdr->raidPtr),
374 RF_UNLOCK_DISK_QUEUE, NULL);
375 if (!req)
376 (node->wakeFunc) (node, ENOMEM);
377 else {
378 node->dagFuncData = (void *) req;
379 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY);
380 }
381
382 return (0);
383 }
384 /*****************************************************************************************
385 * the execution function associated with an "unlock disk queue" node
386 ****************************************************************************************/
387 int
388 rf_DiskUnlockFuncForThreads(node)
389 RF_DagNode_t *node;
390 {
391 RF_DiskQueueData_t *req;
392 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
393 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
394
395 req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP,
396 0L, 0, NULL, 0L, 0,
397 (int (*) (void *, int)) node->wakeFunc,
398 (void *) node,
399 NULL, node->dagHdr->tracerec,
400 (void *) (node->dagHdr->raidPtr),
401 RF_UNLOCK_DISK_QUEUE, NULL);
402 if (!req)
403 (node->wakeFunc) (node, ENOMEM);
404 else {
405 node->dagFuncData = (void *) req;
406 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY);
407 }
408
409 return (0);
410 }
411 /*****************************************************************************************
412 * Callback routine for DiskRead and DiskWrite nodes. When the disk op completes,
413 * the routine is called to set the node status and inform the execution engine that
414 * the node has fired.
415 ****************************************************************************************/
416 int
417 rf_GenericWakeupFunc(node, status)
418 RF_DagNode_t *node;
419 int status;
420 {
421 switch (node->status) {
422 case rf_bwd1:
423 node->status = rf_bwd2;
424 if (node->dagFuncData)
425 rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData);
426 return (rf_DiskWriteFuncForThreads(node));
427 break;
428 case rf_fired:
429 if (status)
430 node->status = rf_bad;
431 else
432 node->status = rf_good;
433 break;
434 case rf_recover:
435 /* probably should never reach this case */
436 if (status)
437 node->status = rf_panic;
438 else
439 node->status = rf_undone;
440 break;
441 default:
442 printf("rf_GenericWakeupFunc:");
443 printf("node->status is %d,", node->status);
444 printf("status is %d \n", status);
445 RF_PANIC();
446 break;
447 }
448 if (node->dagFuncData)
449 rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData);
450 return (rf_FinishNode(node, RF_INTR_CONTEXT));
451 }
452
453
454 /*****************************************************************************************
455 * there are three distinct types of xor nodes
456 * A "regular xor" is used in the fault-free case where the access spans a complete
457 * stripe unit. It assumes that the result buffer is one full stripe unit in size,
458 * and uses the stripe-unit-offset values that it computes from the PDAs to determine
459 * where within the stripe unit to XOR each argument buffer.
460 *
461 * A "simple xor" is used in the fault-free case where the access touches only a portion
462 * of one (or two, in some cases) stripe unit(s). It assumes that all the argument
463 * buffers are of the same size and have the same stripe unit offset.
464 *
465 * A "recovery xor" is used in the degraded-mode case. It's similar to the regular
466 * xor function except that it takes the failed PDA as an additional parameter, and
467 * uses it to determine what portions of the argument buffers need to be xor'd into
468 * the result buffer, and where in the result buffer they should go.
469 ****************************************************************************************/
470
471 /* xor the params together and store the result in the result field.
472 * assume the result field points to a buffer that is the size of one SU,
473 * and use the pda params to determine where within the buffer to XOR
474 * the input buffers.
475 */
476 int
477 rf_RegularXorFunc(node)
478 RF_DagNode_t *node;
479 {
480 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
481 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
482 RF_Etimer_t timer;
483 int i, retcode;
484
485 retcode = 0;
486 if (node->dagHdr->status == rf_enable) {
487 /* don't do the XOR if the input is the same as the output */
488 RF_ETIMER_START(timer);
489 for (i = 0; i < node->numParams - 1; i += 2)
490 if (node->params[i + 1].p != node->results[0]) {
491 retcode = rf_XorIntoBuffer(raidPtr, (RF_PhysDiskAddr_t *) node->params[i].p,
492 (char *) node->params[i + 1].p, (char *) node->results[0], node->dagHdr->bp);
493 }
494 RF_ETIMER_STOP(timer);
495 RF_ETIMER_EVAL(timer);
496 tracerec->xor_us += RF_ETIMER_VAL_US(timer);
497 }
498 return (rf_GenericWakeupFunc(node, retcode)); /* call wake func
499 * explicitly since no
500 * I/O in this node */
501 }
502 /* xor the inputs into the result buffer, ignoring placement issues */
503 int
504 rf_SimpleXorFunc(node)
505 RF_DagNode_t *node;
506 {
507 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
508 int i, retcode = 0;
509 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
510 RF_Etimer_t timer;
511
512 if (node->dagHdr->status == rf_enable) {
513 RF_ETIMER_START(timer);
514 /* don't do the XOR if the input is the same as the output */
515 for (i = 0; i < node->numParams - 1; i += 2)
516 if (node->params[i + 1].p != node->results[0]) {
517 retcode = rf_bxor((char *) node->params[i + 1].p, (char *) node->results[0],
518 rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[i].p)->numSector),
519 (struct buf *) node->dagHdr->bp);
520 }
521 RF_ETIMER_STOP(timer);
522 RF_ETIMER_EVAL(timer);
523 tracerec->xor_us += RF_ETIMER_VAL_US(timer);
524 }
525 return (rf_GenericWakeupFunc(node, retcode)); /* call wake func
526 * explicitly since no
527 * I/O in this node */
528 }
529 /* this xor is used by the degraded-mode dag functions to recover lost data.
530 * the second-to-last parameter is the PDA for the failed portion of the access.
531 * the code here looks at this PDA and assumes that the xor target buffer is
532 * equal in size to the number of sectors in the failed PDA. It then uses
533 * the other PDAs in the parameter list to determine where within the target
534 * buffer the corresponding data should be xored.
535 */
536 int
537 rf_RecoveryXorFunc(node)
538 RF_DagNode_t *node;
539 {
540 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
541 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
542 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
543 int i, retcode = 0;
544 RF_PhysDiskAddr_t *pda;
545 int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
546 char *srcbuf, *destbuf;
547 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
548 RF_Etimer_t timer;
549
550 if (node->dagHdr->status == rf_enable) {
551 RF_ETIMER_START(timer);
552 for (i = 0; i < node->numParams - 2; i += 2)
553 if (node->params[i + 1].p != node->results[0]) {
554 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
555 srcbuf = (char *) node->params[i + 1].p;
556 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
557 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
558 retcode = rf_bxor(srcbuf, destbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), node->dagHdr->bp);
559 }
560 RF_ETIMER_STOP(timer);
561 RF_ETIMER_EVAL(timer);
562 tracerec->xor_us += RF_ETIMER_VAL_US(timer);
563 }
564 return (rf_GenericWakeupFunc(node, retcode));
565 }
566 /*****************************************************************************************
567 * The next three functions are utilities used by the above xor-execution functions.
568 ****************************************************************************************/
569
570
571 /*
572 * this is just a glorified buffer xor. targbuf points to a buffer that is one full stripe unit
573 * in size. srcbuf points to a buffer that may be less than 1 SU, but never more. When the
574 * access described by pda is one SU in size (which by implication means it's SU-aligned),
575 * all that happens is (targbuf) <- (srcbuf ^ targbuf). When the access is less than one
576 * SU in size the XOR occurs on only the portion of targbuf identified in the pda.
577 */
578
579 int
580 rf_XorIntoBuffer(raidPtr, pda, srcbuf, targbuf, bp)
581 RF_Raid_t *raidPtr;
582 RF_PhysDiskAddr_t *pda;
583 char *srcbuf;
584 char *targbuf;
585 void *bp;
586 {
587 char *targptr;
588 int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
589 int SUOffset = pda->startSector % sectPerSU;
590 int length, retcode = 0;
591
592 RF_ASSERT(pda->numSector <= sectPerSU);
593
594 targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset);
595 length = rf_RaidAddressToByte(raidPtr, pda->numSector);
596 retcode = rf_bxor(srcbuf, targptr, length, bp);
597 return (retcode);
598 }
599 /* it really should be the case that the buffer pointers (returned by malloc)
600 * are aligned to the natural word size of the machine, so this is the only
601 * case we optimize for. The length should always be a multiple of the sector
602 * size, so there should be no problem with leftover bytes at the end.
603 */
604 int
605 rf_bxor(src, dest, len, bp)
606 char *src;
607 char *dest;
608 int len;
609 void *bp;
610 {
611 unsigned mask = sizeof(long) - 1, retcode = 0;
612
613 if (!(((unsigned long) src) & mask) && !(((unsigned long) dest) & mask) && !(len & mask)) {
614 retcode = rf_longword_bxor((unsigned long *) src, (unsigned long *) dest, len >> RF_LONGSHIFT, bp);
615 } else {
616 RF_ASSERT(0);
617 }
618 return (retcode);
619 }
620 /* map a user buffer into kernel space, if necessary */
621 #define REMAP_VA(_bp,x,y) (y) = (x)
622
623 /* When XORing in kernel mode, we need to map each user page to kernel space before we can access it.
624 * We don't want to assume anything about which input buffers are in kernel/user
625 * space, nor about their alignment, so in each loop we compute the maximum number
626 * of bytes that we can xor without crossing any page boundaries, and do only this many
627 * bytes before the next remap.
628 */
629 int
630 rf_longword_bxor(src, dest, len, bp)
631 unsigned long *src;
632 unsigned long *dest;
633 int len; /* longwords */
634 void *bp;
635 {
636 unsigned long *end = src + len;
637 unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */
638 unsigned long *pg_src, *pg_dest; /* per-page source/dest
639 * pointers */
640 int longs_this_time;/* # longwords to xor in the current iteration */
641
642 REMAP_VA(bp, src, pg_src);
643 REMAP_VA(bp, dest, pg_dest);
644 if (!pg_src || !pg_dest)
645 return (EFAULT);
646
647 while (len >= 4) {
648 longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), RF_BLIP(pg_dest)) >> RF_LONGSHIFT); /* note len in longwords */
649 src += longs_this_time;
650 dest += longs_this_time;
651 len -= longs_this_time;
652 while (longs_this_time >= 4) {
653 d0 = pg_dest[0];
654 d1 = pg_dest[1];
655 d2 = pg_dest[2];
656 d3 = pg_dest[3];
657 s0 = pg_src[0];
658 s1 = pg_src[1];
659 s2 = pg_src[2];
660 s3 = pg_src[3];
661 pg_dest[0] = d0 ^ s0;
662 pg_dest[1] = d1 ^ s1;
663 pg_dest[2] = d2 ^ s2;
664 pg_dest[3] = d3 ^ s3;
665 pg_src += 4;
666 pg_dest += 4;
667 longs_this_time -= 4;
668 }
669 while (longs_this_time > 0) { /* cannot cross any page
670 * boundaries here */
671 *pg_dest++ ^= *pg_src++;
672 longs_this_time--;
673 }
674
675 /* either we're done, or we've reached a page boundary on one
676 * (or possibly both) of the pointers */
677 if (len) {
678 if (RF_PAGE_ALIGNED(src))
679 REMAP_VA(bp, src, pg_src);
680 if (RF_PAGE_ALIGNED(dest))
681 REMAP_VA(bp, dest, pg_dest);
682 if (!pg_src || !pg_dest)
683 return (EFAULT);
684 }
685 }
686 while (src < end) {
687 *pg_dest++ ^= *pg_src++;
688 src++;
689 dest++;
690 len--;
691 if (RF_PAGE_ALIGNED(src))
692 REMAP_VA(bp, src, pg_src);
693 if (RF_PAGE_ALIGNED(dest))
694 REMAP_VA(bp, dest, pg_dest);
695 }
696 RF_ASSERT(len == 0);
697 return (0);
698 }
699
700 #if 0
701 /*
702 dst = a ^ b ^ c;
703 a may equal dst
704 see comment above longword_bxor
705 */
706 int
707 rf_longword_bxor3(dst, a, b, c, len, bp)
708 unsigned long *dst;
709 unsigned long *a;
710 unsigned long *b;
711 unsigned long *c;
712 int len; /* length in longwords */
713 void *bp;
714 {
715 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
716 unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; /* per-page source/dest
717 * pointers */
718 int longs_this_time;/* # longs to xor in the current iteration */
719 char dst_is_a = 0;
720
721 REMAP_VA(bp, a, pg_a);
722 REMAP_VA(bp, b, pg_b);
723 REMAP_VA(bp, c, pg_c);
724 if (a == dst) {
725 pg_dst = pg_a;
726 dst_is_a = 1;
727 } else {
728 REMAP_VA(bp, dst, pg_dst);
729 }
730
731 /* align dest to cache line. Can't cross a pg boundary on dst here. */
732 while ((((unsigned long) pg_dst) & 0x1f)) {
733 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
734 dst++;
735 a++;
736 b++;
737 c++;
738 if (RF_PAGE_ALIGNED(a)) {
739 REMAP_VA(bp, a, pg_a);
740 if (!pg_a)
741 return (EFAULT);
742 }
743 if (RF_PAGE_ALIGNED(b)) {
744 REMAP_VA(bp, a, pg_b);
745 if (!pg_b)
746 return (EFAULT);
747 }
748 if (RF_PAGE_ALIGNED(c)) {
749 REMAP_VA(bp, a, pg_c);
750 if (!pg_c)
751 return (EFAULT);
752 }
753 len--;
754 }
755
756 while (len > 4) {
757 longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> RF_LONGSHIFT);
758 a += longs_this_time;
759 b += longs_this_time;
760 c += longs_this_time;
761 dst += longs_this_time;
762 len -= longs_this_time;
763 while (longs_this_time >= 4) {
764 a0 = pg_a[0];
765 longs_this_time -= 4;
766
767 a1 = pg_a[1];
768 a2 = pg_a[2];
769
770 a3 = pg_a[3];
771 pg_a += 4;
772
773 b0 = pg_b[0];
774 b1 = pg_b[1];
775
776 b2 = pg_b[2];
777 b3 = pg_b[3];
778 /* start dual issue */
779 a0 ^= b0;
780 b0 = pg_c[0];
781
782 pg_b += 4;
783 a1 ^= b1;
784
785 a2 ^= b2;
786 a3 ^= b3;
787
788 b1 = pg_c[1];
789 a0 ^= b0;
790
791 b2 = pg_c[2];
792 a1 ^= b1;
793
794 b3 = pg_c[3];
795 a2 ^= b2;
796
797 pg_dst[0] = a0;
798 a3 ^= b3;
799 pg_dst[1] = a1;
800 pg_c += 4;
801 pg_dst[2] = a2;
802 pg_dst[3] = a3;
803 pg_dst += 4;
804 }
805 while (longs_this_time > 0) { /* cannot cross any page
806 * boundaries here */
807 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
808 longs_this_time--;
809 }
810
811 if (len) {
812 if (RF_PAGE_ALIGNED(a)) {
813 REMAP_VA(bp, a, pg_a);
814 if (!pg_a)
815 return (EFAULT);
816 if (dst_is_a)
817 pg_dst = pg_a;
818 }
819 if (RF_PAGE_ALIGNED(b)) {
820 REMAP_VA(bp, b, pg_b);
821 if (!pg_b)
822 return (EFAULT);
823 }
824 if (RF_PAGE_ALIGNED(c)) {
825 REMAP_VA(bp, c, pg_c);
826 if (!pg_c)
827 return (EFAULT);
828 }
829 if (!dst_is_a)
830 if (RF_PAGE_ALIGNED(dst)) {
831 REMAP_VA(bp, dst, pg_dst);
832 if (!pg_dst)
833 return (EFAULT);
834 }
835 }
836 }
837 while (len) {
838 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
839 dst++;
840 a++;
841 b++;
842 c++;
843 if (RF_PAGE_ALIGNED(a)) {
844 REMAP_VA(bp, a, pg_a);
845 if (!pg_a)
846 return (EFAULT);
847 if (dst_is_a)
848 pg_dst = pg_a;
849 }
850 if (RF_PAGE_ALIGNED(b)) {
851 REMAP_VA(bp, b, pg_b);
852 if (!pg_b)
853 return (EFAULT);
854 }
855 if (RF_PAGE_ALIGNED(c)) {
856 REMAP_VA(bp, c, pg_c);
857 if (!pg_c)
858 return (EFAULT);
859 }
860 if (!dst_is_a)
861 if (RF_PAGE_ALIGNED(dst)) {
862 REMAP_VA(bp, dst, pg_dst);
863 if (!pg_dst)
864 return (EFAULT);
865 }
866 len--;
867 }
868 return (0);
869 }
870
871 int
872 rf_bxor3(dst, a, b, c, len, bp)
873 unsigned char *dst;
874 unsigned char *a;
875 unsigned char *b;
876 unsigned char *c;
877 unsigned long len;
878 void *bp;
879 {
880 RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7) == 0);
881
882 return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a,
883 (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT, bp));
884 }
885 #endif
886