rf_dagfuncs.c revision 1.10 1 /* $NetBSD: rf_dagfuncs.c,v 1.10 2002/09/21 00:52:49 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland, William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /*
30 * dagfuncs.c -- DAG node execution routines
31 *
32 * Rules:
33 * 1. Every DAG execution function must eventually cause node->status to
34 * get set to "good" or "bad", and "FinishNode" to be called. In the
35 * case of nodes that complete immediately (xor, NullNodeFunc, etc),
36 * the node execution function can do these two things directly. In
37 * the case of nodes that have to wait for some event (a disk read to
38 * complete, a lock to be released, etc) to occur before they can
39 * complete, this is typically achieved by having whatever module
40 * is doing the operation call GenericWakeupFunc upon completion.
41 * 2. DAG execution functions should check the status in the DAG header
42 * and NOP out their operations if the status is not "enable". However,
43 * execution functions that release resources must be sure to release
44 * them even when they NOP out the function that would use them.
45 * Functions that acquire resources should go ahead and acquire them
46 * even when they NOP, so that a downstream release node will not have
47 * to check to find out whether or not the acquire was suppressed.
48 */
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: rf_dagfuncs.c,v 1.10 2002/09/21 00:52:49 oster Exp $");
52
53 #include <sys/param.h>
54 #include <sys/ioctl.h>
55
56 #include "rf_archs.h"
57 #include "rf_raid.h"
58 #include "rf_dag.h"
59 #include "rf_layout.h"
60 #include "rf_etimer.h"
61 #include "rf_acctrace.h"
62 #include "rf_diskqueue.h"
63 #include "rf_dagfuncs.h"
64 #include "rf_general.h"
65 #include "rf_engine.h"
66 #include "rf_dagutils.h"
67
68 #include "rf_kintf.h"
69
70 #if RF_INCLUDE_PARITYLOGGING > 0
71 #include "rf_paritylog.h"
72 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
73
74 int (*rf_DiskReadFunc) (RF_DagNode_t *);
75 int (*rf_DiskWriteFunc) (RF_DagNode_t *);
76 int (*rf_DiskReadUndoFunc) (RF_DagNode_t *);
77 int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *);
78 int (*rf_DiskUnlockFunc) (RF_DagNode_t *);
79 int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *);
80 int (*rf_RegularXorUndoFunc) (RF_DagNode_t *);
81 int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *);
82 int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *);
83
84 /*****************************************************************************************
85 * main (only) configuration routine for this module
86 ****************************************************************************************/
87 int
88 rf_ConfigureDAGFuncs(listp)
89 RF_ShutdownList_t **listp;
90 {
91 RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) || ((sizeof(long) == 4) && RF_LONGSHIFT == 2));
92 rf_DiskReadFunc = rf_DiskReadFuncForThreads;
93 rf_DiskReadUndoFunc = rf_DiskUndoFunc;
94 rf_DiskWriteFunc = rf_DiskWriteFuncForThreads;
95 rf_DiskWriteUndoFunc = rf_DiskUndoFunc;
96 rf_DiskUnlockFunc = rf_DiskUnlockFuncForThreads;
97 rf_DiskUnlockUndoFunc = rf_NullNodeUndoFunc;
98 rf_RegularXorUndoFunc = rf_NullNodeUndoFunc;
99 rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc;
100 rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc;
101 return (0);
102 }
103
104
105
106 /*****************************************************************************************
107 * the execution function associated with a terminate node
108 ****************************************************************************************/
109 int
110 rf_TerminateFunc(node)
111 RF_DagNode_t *node;
112 {
113 RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes);
114 node->status = rf_good;
115 return (rf_FinishNode(node, RF_THREAD_CONTEXT));
116 }
117
118 int
119 rf_TerminateUndoFunc(node)
120 RF_DagNode_t *node;
121 {
122 return (0);
123 }
124
125
126 /*****************************************************************************************
127 * execution functions associated with a mirror node
128 *
129 * parameters:
130 *
131 * 0 - physical disk addres of data
132 * 1 - buffer for holding read data
133 * 2 - parity stripe ID
134 * 3 - flags
135 * 4 - physical disk address of mirror (parity)
136 *
137 ****************************************************************************************/
138
139 int
140 rf_DiskReadMirrorIdleFunc(node)
141 RF_DagNode_t *node;
142 {
143 /* select the mirror copy with the shortest queue and fill in node
144 * parameters with physical disk address */
145
146 rf_SelectMirrorDiskIdle(node);
147 return (rf_DiskReadFunc(node));
148 }
149
150 int
151 rf_DiskReadMirrorPartitionFunc(node)
152 RF_DagNode_t *node;
153 {
154 /* select the mirror copy with the shortest queue and fill in node
155 * parameters with physical disk address */
156
157 rf_SelectMirrorDiskPartition(node);
158 return (rf_DiskReadFunc(node));
159 }
160
161 int
162 rf_DiskReadMirrorUndoFunc(node)
163 RF_DagNode_t *node;
164 {
165 return (0);
166 }
167
168
169
170 #if RF_INCLUDE_PARITYLOGGING > 0
171 /*****************************************************************************************
172 * the execution function associated with a parity log update node
173 ****************************************************************************************/
174 int
175 rf_ParityLogUpdateFunc(node)
176 RF_DagNode_t *node;
177 {
178 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
179 caddr_t buf = (caddr_t) node->params[1].p;
180 RF_ParityLogData_t *logData;
181 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
182 RF_Etimer_t timer;
183
184 if (node->dagHdr->status == rf_enable) {
185 RF_ETIMER_START(timer);
186 logData = rf_CreateParityLogData(RF_UPDATE, pda, buf,
187 (RF_Raid_t *) (node->dagHdr->raidPtr),
188 node->wakeFunc, (void *) node,
189 node->dagHdr->tracerec, timer);
190 if (logData)
191 rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE);
192 else {
193 RF_ETIMER_STOP(timer);
194 RF_ETIMER_EVAL(timer);
195 tracerec->plog_us += RF_ETIMER_VAL_US(timer);
196 (node->wakeFunc) (node, ENOMEM);
197 }
198 }
199 return (0);
200 }
201
202
203 /*****************************************************************************************
204 * the execution function associated with a parity log overwrite node
205 ****************************************************************************************/
206 int
207 rf_ParityLogOverwriteFunc(node)
208 RF_DagNode_t *node;
209 {
210 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
211 caddr_t buf = (caddr_t) node->params[1].p;
212 RF_ParityLogData_t *logData;
213 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
214 RF_Etimer_t timer;
215
216 if (node->dagHdr->status == rf_enable) {
217 RF_ETIMER_START(timer);
218 logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf, (RF_Raid_t *) (node->dagHdr->raidPtr),
219 node->wakeFunc, (void *) node, node->dagHdr->tracerec, timer);
220 if (logData)
221 rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE);
222 else {
223 RF_ETIMER_STOP(timer);
224 RF_ETIMER_EVAL(timer);
225 tracerec->plog_us += RF_ETIMER_VAL_US(timer);
226 (node->wakeFunc) (node, ENOMEM);
227 }
228 }
229 return (0);
230 }
231
232 int
233 rf_ParityLogUpdateUndoFunc(node)
234 RF_DagNode_t *node;
235 {
236 return (0);
237 }
238
239 int
240 rf_ParityLogOverwriteUndoFunc(node)
241 RF_DagNode_t *node;
242 {
243 return (0);
244 }
245 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
246
247 /*****************************************************************************************
248 * the execution function associated with a NOP node
249 ****************************************************************************************/
250 int
251 rf_NullNodeFunc(node)
252 RF_DagNode_t *node;
253 {
254 node->status = rf_good;
255 return (rf_FinishNode(node, RF_THREAD_CONTEXT));
256 }
257
258 int
259 rf_NullNodeUndoFunc(node)
260 RF_DagNode_t *node;
261 {
262 node->status = rf_undone;
263 return (rf_FinishNode(node, RF_THREAD_CONTEXT));
264 }
265
266
267 /*****************************************************************************************
268 * the execution function associated with a disk-read node
269 ****************************************************************************************/
270 int
271 rf_DiskReadFuncForThreads(node)
272 RF_DagNode_t *node;
273 {
274 RF_DiskQueueData_t *req;
275 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
276 caddr_t buf = (caddr_t) node->params[1].p;
277 RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v;
278 unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v);
279 unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
280 unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
281 unsigned which_ru = RF_EXTRACT_RU(node->params[3].v);
282 RF_DiskQueueDataFlags_t flags = 0;
283 RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_READ : RF_IO_TYPE_NOP;
284 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
285 void *b_proc = NULL;
286
287 if (node->dagHdr->bp)
288 b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc;
289
290 RF_ASSERT(!(lock && unlock));
291 flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0;
292 flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0;
293
294 req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector,
295 buf, parityStripeID, which_ru,
296 (int (*) (void *, int)) node->wakeFunc,
297 node, NULL, node->dagHdr->tracerec,
298 (void *) (node->dagHdr->raidPtr), flags, b_proc);
299 if (!req) {
300 (node->wakeFunc) (node, ENOMEM);
301 } else {
302 node->dagFuncData = (void *) req;
303 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority);
304 }
305 return (0);
306 }
307
308
309 /*****************************************************************************************
310 * the execution function associated with a disk-write node
311 ****************************************************************************************/
312 int
313 rf_DiskWriteFuncForThreads(node)
314 RF_DagNode_t *node;
315 {
316 RF_DiskQueueData_t *req;
317 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
318 caddr_t buf = (caddr_t) node->params[1].p;
319 RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v;
320 unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v);
321 unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
322 unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
323 unsigned which_ru = RF_EXTRACT_RU(node->params[3].v);
324 RF_DiskQueueDataFlags_t flags = 0;
325 RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP;
326 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
327 void *b_proc = NULL;
328
329 if (node->dagHdr->bp)
330 b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc;
331
332 /* normal processing (rollaway or forward recovery) begins here */
333 RF_ASSERT(!(lock && unlock));
334 flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0;
335 flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0;
336 req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector,
337 buf, parityStripeID, which_ru,
338 (int (*) (void *, int)) node->wakeFunc,
339 (void *) node, NULL,
340 node->dagHdr->tracerec,
341 (void *) (node->dagHdr->raidPtr),
342 flags, b_proc);
343
344 if (!req) {
345 (node->wakeFunc) (node, ENOMEM);
346 } else {
347 node->dagFuncData = (void *) req;
348 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority);
349 }
350
351 return (0);
352 }
353 /*****************************************************************************************
354 * the undo function for disk nodes
355 * Note: this is not a proper undo of a write node, only locks are released.
356 * old data is not restored to disk!
357 ****************************************************************************************/
358 int
359 rf_DiskUndoFunc(node)
360 RF_DagNode_t *node;
361 {
362 RF_DiskQueueData_t *req;
363 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
364 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
365
366 req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP,
367 0L, 0, NULL, 0L, 0,
368 (int (*) (void *, int)) node->wakeFunc,
369 (void *) node,
370 NULL, node->dagHdr->tracerec,
371 (void *) (node->dagHdr->raidPtr),
372 RF_UNLOCK_DISK_QUEUE, NULL);
373 if (!req)
374 (node->wakeFunc) (node, ENOMEM);
375 else {
376 node->dagFuncData = (void *) req;
377 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY);
378 }
379
380 return (0);
381 }
382 /*****************************************************************************************
383 * the execution function associated with an "unlock disk queue" node
384 ****************************************************************************************/
385 int
386 rf_DiskUnlockFuncForThreads(node)
387 RF_DagNode_t *node;
388 {
389 RF_DiskQueueData_t *req;
390 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
391 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
392
393 req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP,
394 0L, 0, NULL, 0L, 0,
395 (int (*) (void *, int)) node->wakeFunc,
396 (void *) node,
397 NULL, node->dagHdr->tracerec,
398 (void *) (node->dagHdr->raidPtr),
399 RF_UNLOCK_DISK_QUEUE, NULL);
400 if (!req)
401 (node->wakeFunc) (node, ENOMEM);
402 else {
403 node->dagFuncData = (void *) req;
404 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY);
405 }
406
407 return (0);
408 }
409 /*****************************************************************************************
410 * Callback routine for DiskRead and DiskWrite nodes. When the disk op completes,
411 * the routine is called to set the node status and inform the execution engine that
412 * the node has fired.
413 ****************************************************************************************/
414 int
415 rf_GenericWakeupFunc(node, status)
416 RF_DagNode_t *node;
417 int status;
418 {
419 switch (node->status) {
420 case rf_bwd1:
421 node->status = rf_bwd2;
422 if (node->dagFuncData)
423 rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData);
424 return (rf_DiskWriteFuncForThreads(node));
425 break;
426 case rf_fired:
427 if (status)
428 node->status = rf_bad;
429 else
430 node->status = rf_good;
431 break;
432 case rf_recover:
433 /* probably should never reach this case */
434 if (status)
435 node->status = rf_panic;
436 else
437 node->status = rf_undone;
438 break;
439 default:
440 printf("rf_GenericWakeupFunc:");
441 printf("node->status is %d,", node->status);
442 printf("status is %d \n", status);
443 RF_PANIC();
444 break;
445 }
446 if (node->dagFuncData)
447 rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData);
448 return (rf_FinishNode(node, RF_INTR_CONTEXT));
449 }
450
451
452 /*****************************************************************************************
453 * there are three distinct types of xor nodes
454 * A "regular xor" is used in the fault-free case where the access spans a complete
455 * stripe unit. It assumes that the result buffer is one full stripe unit in size,
456 * and uses the stripe-unit-offset values that it computes from the PDAs to determine
457 * where within the stripe unit to XOR each argument buffer.
458 *
459 * A "simple xor" is used in the fault-free case where the access touches only a portion
460 * of one (or two, in some cases) stripe unit(s). It assumes that all the argument
461 * buffers are of the same size and have the same stripe unit offset.
462 *
463 * A "recovery xor" is used in the degraded-mode case. It's similar to the regular
464 * xor function except that it takes the failed PDA as an additional parameter, and
465 * uses it to determine what portions of the argument buffers need to be xor'd into
466 * the result buffer, and where in the result buffer they should go.
467 ****************************************************************************************/
468
469 /* xor the params together and store the result in the result field.
470 * assume the result field points to a buffer that is the size of one SU,
471 * and use the pda params to determine where within the buffer to XOR
472 * the input buffers.
473 */
474 int
475 rf_RegularXorFunc(node)
476 RF_DagNode_t *node;
477 {
478 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
479 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
480 RF_Etimer_t timer;
481 int i, retcode;
482
483 retcode = 0;
484 if (node->dagHdr->status == rf_enable) {
485 /* don't do the XOR if the input is the same as the output */
486 RF_ETIMER_START(timer);
487 for (i = 0; i < node->numParams - 1; i += 2)
488 if (node->params[i + 1].p != node->results[0]) {
489 retcode = rf_XorIntoBuffer(raidPtr, (RF_PhysDiskAddr_t *) node->params[i].p,
490 (char *) node->params[i + 1].p, (char *) node->results[0], node->dagHdr->bp);
491 }
492 RF_ETIMER_STOP(timer);
493 RF_ETIMER_EVAL(timer);
494 tracerec->xor_us += RF_ETIMER_VAL_US(timer);
495 }
496 return (rf_GenericWakeupFunc(node, retcode)); /* call wake func
497 * explicitly since no
498 * I/O in this node */
499 }
500 /* xor the inputs into the result buffer, ignoring placement issues */
501 int
502 rf_SimpleXorFunc(node)
503 RF_DagNode_t *node;
504 {
505 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
506 int i, retcode = 0;
507 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
508 RF_Etimer_t timer;
509
510 if (node->dagHdr->status == rf_enable) {
511 RF_ETIMER_START(timer);
512 /* don't do the XOR if the input is the same as the output */
513 for (i = 0; i < node->numParams - 1; i += 2)
514 if (node->params[i + 1].p != node->results[0]) {
515 retcode = rf_bxor((char *) node->params[i + 1].p, (char *) node->results[0],
516 rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[i].p)->numSector),
517 (struct buf *) node->dagHdr->bp);
518 }
519 RF_ETIMER_STOP(timer);
520 RF_ETIMER_EVAL(timer);
521 tracerec->xor_us += RF_ETIMER_VAL_US(timer);
522 }
523 return (rf_GenericWakeupFunc(node, retcode)); /* call wake func
524 * explicitly since no
525 * I/O in this node */
526 }
527 /* this xor is used by the degraded-mode dag functions to recover lost data.
528 * the second-to-last parameter is the PDA for the failed portion of the access.
529 * the code here looks at this PDA and assumes that the xor target buffer is
530 * equal in size to the number of sectors in the failed PDA. It then uses
531 * the other PDAs in the parameter list to determine where within the target
532 * buffer the corresponding data should be xored.
533 */
534 int
535 rf_RecoveryXorFunc(node)
536 RF_DagNode_t *node;
537 {
538 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
539 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
540 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
541 int i, retcode = 0;
542 RF_PhysDiskAddr_t *pda;
543 int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
544 char *srcbuf, *destbuf;
545 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
546 RF_Etimer_t timer;
547
548 if (node->dagHdr->status == rf_enable) {
549 RF_ETIMER_START(timer);
550 for (i = 0; i < node->numParams - 2; i += 2)
551 if (node->params[i + 1].p != node->results[0]) {
552 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
553 srcbuf = (char *) node->params[i + 1].p;
554 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
555 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
556 retcode = rf_bxor(srcbuf, destbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), node->dagHdr->bp);
557 }
558 RF_ETIMER_STOP(timer);
559 RF_ETIMER_EVAL(timer);
560 tracerec->xor_us += RF_ETIMER_VAL_US(timer);
561 }
562 return (rf_GenericWakeupFunc(node, retcode));
563 }
564 /*****************************************************************************************
565 * The next three functions are utilities used by the above xor-execution functions.
566 ****************************************************************************************/
567
568
569 /*
570 * this is just a glorified buffer xor. targbuf points to a buffer that is one full stripe unit
571 * in size. srcbuf points to a buffer that may be less than 1 SU, but never more. When the
572 * access described by pda is one SU in size (which by implication means it's SU-aligned),
573 * all that happens is (targbuf) <- (srcbuf ^ targbuf). When the access is less than one
574 * SU in size the XOR occurs on only the portion of targbuf identified in the pda.
575 */
576
577 int
578 rf_XorIntoBuffer(raidPtr, pda, srcbuf, targbuf, bp)
579 RF_Raid_t *raidPtr;
580 RF_PhysDiskAddr_t *pda;
581 char *srcbuf;
582 char *targbuf;
583 void *bp;
584 {
585 char *targptr;
586 int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
587 int SUOffset = pda->startSector % sectPerSU;
588 int length, retcode = 0;
589
590 RF_ASSERT(pda->numSector <= sectPerSU);
591
592 targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset);
593 length = rf_RaidAddressToByte(raidPtr, pda->numSector);
594 retcode = rf_bxor(srcbuf, targptr, length, bp);
595 return (retcode);
596 }
597 /* it really should be the case that the buffer pointers (returned by malloc)
598 * are aligned to the natural word size of the machine, so this is the only
599 * case we optimize for. The length should always be a multiple of the sector
600 * size, so there should be no problem with leftover bytes at the end.
601 */
602 int
603 rf_bxor(src, dest, len, bp)
604 char *src;
605 char *dest;
606 int len;
607 void *bp;
608 {
609 unsigned mask = sizeof(long) - 1, retcode = 0;
610
611 if (!(((unsigned long) src) & mask) && !(((unsigned long) dest) & mask) && !(len & mask)) {
612 retcode = rf_longword_bxor((unsigned long *) src, (unsigned long *) dest, len >> RF_LONGSHIFT, bp);
613 } else {
614 RF_ASSERT(0);
615 }
616 return (retcode);
617 }
618 /* map a user buffer into kernel space, if necessary */
619 #define REMAP_VA(_bp,x,y) (y) = (x)
620
621 /* When XORing in kernel mode, we need to map each user page to kernel space before we can access it.
622 * We don't want to assume anything about which input buffers are in kernel/user
623 * space, nor about their alignment, so in each loop we compute the maximum number
624 * of bytes that we can xor without crossing any page boundaries, and do only this many
625 * bytes before the next remap.
626 */
627 int
628 rf_longword_bxor(src, dest, len, bp)
629 unsigned long *src;
630 unsigned long *dest;
631 int len; /* longwords */
632 void *bp;
633 {
634 unsigned long *end = src + len;
635 unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */
636 unsigned long *pg_src, *pg_dest; /* per-page source/dest
637 * pointers */
638 int longs_this_time;/* # longwords to xor in the current iteration */
639
640 REMAP_VA(bp, src, pg_src);
641 REMAP_VA(bp, dest, pg_dest);
642 if (!pg_src || !pg_dest)
643 return (EFAULT);
644
645 while (len >= 4) {
646 longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), RF_BLIP(pg_dest)) >> RF_LONGSHIFT); /* note len in longwords */
647 src += longs_this_time;
648 dest += longs_this_time;
649 len -= longs_this_time;
650 while (longs_this_time >= 4) {
651 d0 = pg_dest[0];
652 d1 = pg_dest[1];
653 d2 = pg_dest[2];
654 d3 = pg_dest[3];
655 s0 = pg_src[0];
656 s1 = pg_src[1];
657 s2 = pg_src[2];
658 s3 = pg_src[3];
659 pg_dest[0] = d0 ^ s0;
660 pg_dest[1] = d1 ^ s1;
661 pg_dest[2] = d2 ^ s2;
662 pg_dest[3] = d3 ^ s3;
663 pg_src += 4;
664 pg_dest += 4;
665 longs_this_time -= 4;
666 }
667 while (longs_this_time > 0) { /* cannot cross any page
668 * boundaries here */
669 *pg_dest++ ^= *pg_src++;
670 longs_this_time--;
671 }
672
673 /* either we're done, or we've reached a page boundary on one
674 * (or possibly both) of the pointers */
675 if (len) {
676 if (RF_PAGE_ALIGNED(src))
677 REMAP_VA(bp, src, pg_src);
678 if (RF_PAGE_ALIGNED(dest))
679 REMAP_VA(bp, dest, pg_dest);
680 if (!pg_src || !pg_dest)
681 return (EFAULT);
682 }
683 }
684 while (src < end) {
685 *pg_dest++ ^= *pg_src++;
686 src++;
687 dest++;
688 len--;
689 if (RF_PAGE_ALIGNED(src))
690 REMAP_VA(bp, src, pg_src);
691 if (RF_PAGE_ALIGNED(dest))
692 REMAP_VA(bp, dest, pg_dest);
693 }
694 RF_ASSERT(len == 0);
695 return (0);
696 }
697
698 #if 0
699 /*
700 dst = a ^ b ^ c;
701 a may equal dst
702 see comment above longword_bxor
703 */
704 int
705 rf_longword_bxor3(dst, a, b, c, len, bp)
706 unsigned long *dst;
707 unsigned long *a;
708 unsigned long *b;
709 unsigned long *c;
710 int len; /* length in longwords */
711 void *bp;
712 {
713 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
714 unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; /* per-page source/dest
715 * pointers */
716 int longs_this_time;/* # longs to xor in the current iteration */
717 char dst_is_a = 0;
718
719 REMAP_VA(bp, a, pg_a);
720 REMAP_VA(bp, b, pg_b);
721 REMAP_VA(bp, c, pg_c);
722 if (a == dst) {
723 pg_dst = pg_a;
724 dst_is_a = 1;
725 } else {
726 REMAP_VA(bp, dst, pg_dst);
727 }
728
729 /* align dest to cache line. Can't cross a pg boundary on dst here. */
730 while ((((unsigned long) pg_dst) & 0x1f)) {
731 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
732 dst++;
733 a++;
734 b++;
735 c++;
736 if (RF_PAGE_ALIGNED(a)) {
737 REMAP_VA(bp, a, pg_a);
738 if (!pg_a)
739 return (EFAULT);
740 }
741 if (RF_PAGE_ALIGNED(b)) {
742 REMAP_VA(bp, a, pg_b);
743 if (!pg_b)
744 return (EFAULT);
745 }
746 if (RF_PAGE_ALIGNED(c)) {
747 REMAP_VA(bp, a, pg_c);
748 if (!pg_c)
749 return (EFAULT);
750 }
751 len--;
752 }
753
754 while (len > 4) {
755 longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> RF_LONGSHIFT);
756 a += longs_this_time;
757 b += longs_this_time;
758 c += longs_this_time;
759 dst += longs_this_time;
760 len -= longs_this_time;
761 while (longs_this_time >= 4) {
762 a0 = pg_a[0];
763 longs_this_time -= 4;
764
765 a1 = pg_a[1];
766 a2 = pg_a[2];
767
768 a3 = pg_a[3];
769 pg_a += 4;
770
771 b0 = pg_b[0];
772 b1 = pg_b[1];
773
774 b2 = pg_b[2];
775 b3 = pg_b[3];
776 /* start dual issue */
777 a0 ^= b0;
778 b0 = pg_c[0];
779
780 pg_b += 4;
781 a1 ^= b1;
782
783 a2 ^= b2;
784 a3 ^= b3;
785
786 b1 = pg_c[1];
787 a0 ^= b0;
788
789 b2 = pg_c[2];
790 a1 ^= b1;
791
792 b3 = pg_c[3];
793 a2 ^= b2;
794
795 pg_dst[0] = a0;
796 a3 ^= b3;
797 pg_dst[1] = a1;
798 pg_c += 4;
799 pg_dst[2] = a2;
800 pg_dst[3] = a3;
801 pg_dst += 4;
802 }
803 while (longs_this_time > 0) { /* cannot cross any page
804 * boundaries here */
805 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
806 longs_this_time--;
807 }
808
809 if (len) {
810 if (RF_PAGE_ALIGNED(a)) {
811 REMAP_VA(bp, a, pg_a);
812 if (!pg_a)
813 return (EFAULT);
814 if (dst_is_a)
815 pg_dst = pg_a;
816 }
817 if (RF_PAGE_ALIGNED(b)) {
818 REMAP_VA(bp, b, pg_b);
819 if (!pg_b)
820 return (EFAULT);
821 }
822 if (RF_PAGE_ALIGNED(c)) {
823 REMAP_VA(bp, c, pg_c);
824 if (!pg_c)
825 return (EFAULT);
826 }
827 if (!dst_is_a)
828 if (RF_PAGE_ALIGNED(dst)) {
829 REMAP_VA(bp, dst, pg_dst);
830 if (!pg_dst)
831 return (EFAULT);
832 }
833 }
834 }
835 while (len) {
836 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
837 dst++;
838 a++;
839 b++;
840 c++;
841 if (RF_PAGE_ALIGNED(a)) {
842 REMAP_VA(bp, a, pg_a);
843 if (!pg_a)
844 return (EFAULT);
845 if (dst_is_a)
846 pg_dst = pg_a;
847 }
848 if (RF_PAGE_ALIGNED(b)) {
849 REMAP_VA(bp, b, pg_b);
850 if (!pg_b)
851 return (EFAULT);
852 }
853 if (RF_PAGE_ALIGNED(c)) {
854 REMAP_VA(bp, c, pg_c);
855 if (!pg_c)
856 return (EFAULT);
857 }
858 if (!dst_is_a)
859 if (RF_PAGE_ALIGNED(dst)) {
860 REMAP_VA(bp, dst, pg_dst);
861 if (!pg_dst)
862 return (EFAULT);
863 }
864 len--;
865 }
866 return (0);
867 }
868
869 int
870 rf_bxor3(dst, a, b, c, len, bp)
871 unsigned char *dst;
872 unsigned char *a;
873 unsigned char *b;
874 unsigned char *c;
875 unsigned long len;
876 void *bp;
877 {
878 RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7) == 0);
879
880 return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a,
881 (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT, bp));
882 }
883 #endif
884