rf_dagfuncs.c revision 1.13 1 /* $NetBSD: rf_dagfuncs.c,v 1.13 2003/12/29 02:38:17 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland, William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /*
30 * dagfuncs.c -- DAG node execution routines
31 *
32 * Rules:
33 * 1. Every DAG execution function must eventually cause node->status to
34 * get set to "good" or "bad", and "FinishNode" to be called. In the
35 * case of nodes that complete immediately (xor, NullNodeFunc, etc),
36 * the node execution function can do these two things directly. In
37 * the case of nodes that have to wait for some event (a disk read to
38 * complete, a lock to be released, etc) to occur before they can
39 * complete, this is typically achieved by having whatever module
40 * is doing the operation call GenericWakeupFunc upon completion.
41 * 2. DAG execution functions should check the status in the DAG header
42 * and NOP out their operations if the status is not "enable". However,
43 * execution functions that release resources must be sure to release
44 * them even when they NOP out the function that would use them.
45 * Functions that acquire resources should go ahead and acquire them
46 * even when they NOP, so that a downstream release node will not have
47 * to check to find out whether or not the acquire was suppressed.
48 */
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: rf_dagfuncs.c,v 1.13 2003/12/29 02:38:17 oster Exp $");
52
53 #include <sys/param.h>
54 #include <sys/ioctl.h>
55
56 #include "rf_archs.h"
57 #include "rf_raid.h"
58 #include "rf_dag.h"
59 #include "rf_layout.h"
60 #include "rf_etimer.h"
61 #include "rf_acctrace.h"
62 #include "rf_diskqueue.h"
63 #include "rf_dagfuncs.h"
64 #include "rf_general.h"
65 #include "rf_engine.h"
66 #include "rf_dagutils.h"
67
68 #include "rf_kintf.h"
69
70 #if RF_INCLUDE_PARITYLOGGING > 0
71 #include "rf_paritylog.h"
72 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
73
74 int (*rf_DiskReadFunc) (RF_DagNode_t *);
75 int (*rf_DiskWriteFunc) (RF_DagNode_t *);
76 int (*rf_DiskReadUndoFunc) (RF_DagNode_t *);
77 int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *);
78 int (*rf_DiskUnlockFunc) (RF_DagNode_t *);
79 int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *);
80 int (*rf_RegularXorUndoFunc) (RF_DagNode_t *);
81 int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *);
82 int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *);
83
84 /*****************************************************************************************
85 * main (only) configuration routine for this module
86 ****************************************************************************************/
87 int
88 rf_ConfigureDAGFuncs(listp)
89 RF_ShutdownList_t **listp;
90 {
91 RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) || ((sizeof(long) == 4) && RF_LONGSHIFT == 2));
92 rf_DiskReadFunc = rf_DiskReadFuncForThreads;
93 rf_DiskReadUndoFunc = rf_DiskUndoFunc;
94 rf_DiskWriteFunc = rf_DiskWriteFuncForThreads;
95 rf_DiskWriteUndoFunc = rf_DiskUndoFunc;
96 rf_DiskUnlockFunc = rf_DiskUnlockFuncForThreads;
97 rf_DiskUnlockUndoFunc = rf_NullNodeUndoFunc;
98 rf_RegularXorUndoFunc = rf_NullNodeUndoFunc;
99 rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc;
100 rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc;
101 return (0);
102 }
103
104
105
106 /*****************************************************************************************
107 * the execution function associated with a terminate node
108 ****************************************************************************************/
109 int
110 rf_TerminateFunc(node)
111 RF_DagNode_t *node;
112 {
113 RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes);
114 node->status = rf_good;
115 return (rf_FinishNode(node, RF_THREAD_CONTEXT));
116 }
117
118 int
119 rf_TerminateUndoFunc(node)
120 RF_DagNode_t *node;
121 {
122 return (0);
123 }
124
125
126 /*****************************************************************************************
127 * execution functions associated with a mirror node
128 *
129 * parameters:
130 *
131 * 0 - physical disk addres of data
132 * 1 - buffer for holding read data
133 * 2 - parity stripe ID
134 * 3 - flags
135 * 4 - physical disk address of mirror (parity)
136 *
137 ****************************************************************************************/
138
139 int
140 rf_DiskReadMirrorIdleFunc(node)
141 RF_DagNode_t *node;
142 {
143 /* select the mirror copy with the shortest queue and fill in node
144 * parameters with physical disk address */
145
146 rf_SelectMirrorDiskIdle(node);
147 return (rf_DiskReadFunc(node));
148 }
149
150 #if (RF_INCLUDE_CHAINDECLUSTER > 0) || (RF_INCLUDE_INTERDECLUSTER > 0) || (RF_DEBUG_VALIDATE_DAG > 0)
151 int
152 rf_DiskReadMirrorPartitionFunc(node)
153 RF_DagNode_t *node;
154 {
155 /* select the mirror copy with the shortest queue and fill in node
156 * parameters with physical disk address */
157
158 rf_SelectMirrorDiskPartition(node);
159 return (rf_DiskReadFunc(node));
160 }
161 #endif
162
163 int
164 rf_DiskReadMirrorUndoFunc(node)
165 RF_DagNode_t *node;
166 {
167 return (0);
168 }
169
170
171
172 #if RF_INCLUDE_PARITYLOGGING > 0
173 /*****************************************************************************************
174 * the execution function associated with a parity log update node
175 ****************************************************************************************/
176 int
177 rf_ParityLogUpdateFunc(node)
178 RF_DagNode_t *node;
179 {
180 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
181 caddr_t buf = (caddr_t) node->params[1].p;
182 RF_ParityLogData_t *logData;
183 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
184 RF_Etimer_t timer;
185
186 if (node->dagHdr->status == rf_enable) {
187 RF_ETIMER_START(timer);
188 logData = rf_CreateParityLogData(RF_UPDATE, pda, buf,
189 (RF_Raid_t *) (node->dagHdr->raidPtr),
190 node->wakeFunc, (void *) node,
191 node->dagHdr->tracerec, timer);
192 if (logData)
193 rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE);
194 else {
195 RF_ETIMER_STOP(timer);
196 RF_ETIMER_EVAL(timer);
197 tracerec->plog_us += RF_ETIMER_VAL_US(timer);
198 (node->wakeFunc) (node, ENOMEM);
199 }
200 }
201 return (0);
202 }
203
204
205 /*****************************************************************************************
206 * the execution function associated with a parity log overwrite node
207 ****************************************************************************************/
208 int
209 rf_ParityLogOverwriteFunc(node)
210 RF_DagNode_t *node;
211 {
212 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
213 caddr_t buf = (caddr_t) node->params[1].p;
214 RF_ParityLogData_t *logData;
215 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
216 RF_Etimer_t timer;
217
218 if (node->dagHdr->status == rf_enable) {
219 RF_ETIMER_START(timer);
220 logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf, (RF_Raid_t *) (node->dagHdr->raidPtr),
221 node->wakeFunc, (void *) node, node->dagHdr->tracerec, timer);
222 if (logData)
223 rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE);
224 else {
225 RF_ETIMER_STOP(timer);
226 RF_ETIMER_EVAL(timer);
227 tracerec->plog_us += RF_ETIMER_VAL_US(timer);
228 (node->wakeFunc) (node, ENOMEM);
229 }
230 }
231 return (0);
232 }
233
234 int
235 rf_ParityLogUpdateUndoFunc(node)
236 RF_DagNode_t *node;
237 {
238 return (0);
239 }
240
241 int
242 rf_ParityLogOverwriteUndoFunc(node)
243 RF_DagNode_t *node;
244 {
245 return (0);
246 }
247 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
248
249 /*****************************************************************************************
250 * the execution function associated with a NOP node
251 ****************************************************************************************/
252 int
253 rf_NullNodeFunc(node)
254 RF_DagNode_t *node;
255 {
256 node->status = rf_good;
257 return (rf_FinishNode(node, RF_THREAD_CONTEXT));
258 }
259
260 int
261 rf_NullNodeUndoFunc(node)
262 RF_DagNode_t *node;
263 {
264 node->status = rf_undone;
265 return (rf_FinishNode(node, RF_THREAD_CONTEXT));
266 }
267
268
269 /*****************************************************************************************
270 * the execution function associated with a disk-read node
271 ****************************************************************************************/
272 int
273 rf_DiskReadFuncForThreads(node)
274 RF_DagNode_t *node;
275 {
276 RF_DiskQueueData_t *req;
277 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
278 caddr_t buf = (caddr_t) node->params[1].p;
279 RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v;
280 unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v);
281 unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
282 unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
283 unsigned which_ru = RF_EXTRACT_RU(node->params[3].v);
284 RF_DiskQueueDataFlags_t flags = 0;
285 RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_READ : RF_IO_TYPE_NOP;
286 RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
287 void *b_proc = NULL;
288
289 if (node->dagHdr->bp)
290 b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc;
291
292 RF_ASSERT(!(lock && unlock));
293 flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0;
294 flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0;
295
296 req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector,
297 buf, parityStripeID, which_ru,
298 (int (*) (void *, int)) node->wakeFunc,
299 node, NULL, node->dagHdr->tracerec,
300 (void *) (node->dagHdr->raidPtr), flags, b_proc);
301 if (!req) {
302 (node->wakeFunc) (node, ENOMEM);
303 } else {
304 node->dagFuncData = (void *) req;
305 rf_DiskIOEnqueue(&(dqs[pda->col]), req, priority);
306 }
307 return (0);
308 }
309
310
311 /*****************************************************************************************
312 * the execution function associated with a disk-write node
313 ****************************************************************************************/
314 int
315 rf_DiskWriteFuncForThreads(node)
316 RF_DagNode_t *node;
317 {
318 RF_DiskQueueData_t *req;
319 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
320 caddr_t buf = (caddr_t) node->params[1].p;
321 RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v;
322 unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v);
323 unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
324 unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
325 unsigned which_ru = RF_EXTRACT_RU(node->params[3].v);
326 RF_DiskQueueDataFlags_t flags = 0;
327 RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP;
328 RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
329 void *b_proc = NULL;
330
331 if (node->dagHdr->bp)
332 b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc;
333
334 /* normal processing (rollaway or forward recovery) begins here */
335 RF_ASSERT(!(lock && unlock));
336 flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0;
337 flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0;
338 req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector,
339 buf, parityStripeID, which_ru,
340 (int (*) (void *, int)) node->wakeFunc,
341 (void *) node, NULL,
342 node->dagHdr->tracerec,
343 (void *) (node->dagHdr->raidPtr),
344 flags, b_proc);
345
346 if (!req) {
347 (node->wakeFunc) (node, ENOMEM);
348 } else {
349 node->dagFuncData = (void *) req;
350 rf_DiskIOEnqueue(&(dqs[pda->col]), req, priority);
351 }
352
353 return (0);
354 }
355 /*****************************************************************************************
356 * the undo function for disk nodes
357 * Note: this is not a proper undo of a write node, only locks are released.
358 * old data is not restored to disk!
359 ****************************************************************************************/
360 int
361 rf_DiskUndoFunc(node)
362 RF_DagNode_t *node;
363 {
364 RF_DiskQueueData_t *req;
365 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
366 RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
367
368 req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP,
369 0L, 0, NULL, 0L, 0,
370 (int (*) (void *, int)) node->wakeFunc,
371 (void *) node,
372 NULL, node->dagHdr->tracerec,
373 (void *) (node->dagHdr->raidPtr),
374 RF_UNLOCK_DISK_QUEUE, NULL);
375 if (!req)
376 (node->wakeFunc) (node, ENOMEM);
377 else {
378 node->dagFuncData = (void *) req;
379 rf_DiskIOEnqueue(&(dqs[pda->col]), req, RF_IO_NORMAL_PRIORITY);
380 }
381
382 return (0);
383 }
384 /*****************************************************************************************
385 * the execution function associated with an "unlock disk queue" node
386 ****************************************************************************************/
387 int
388 rf_DiskUnlockFuncForThreads(node)
389 RF_DagNode_t *node;
390 {
391 RF_DiskQueueData_t *req;
392 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
393 RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
394
395 req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP,
396 0L, 0, NULL, 0L, 0,
397 (int (*) (void *, int)) node->wakeFunc,
398 (void *) node,
399 NULL, node->dagHdr->tracerec,
400 (void *) (node->dagHdr->raidPtr),
401 RF_UNLOCK_DISK_QUEUE, NULL);
402 if (!req)
403 (node->wakeFunc) (node, ENOMEM);
404 else {
405 node->dagFuncData = (void *) req;
406 rf_DiskIOEnqueue(&(dqs[pda->col]), req, RF_IO_NORMAL_PRIORITY);
407 }
408
409 return (0);
410 }
411 /*****************************************************************************************
412 * Callback routine for DiskRead and DiskWrite nodes. When the disk op completes,
413 * the routine is called to set the node status and inform the execution engine that
414 * the node has fired.
415 ****************************************************************************************/
416 int
417 rf_GenericWakeupFunc(node, status)
418 RF_DagNode_t *node;
419 int status;
420 {
421 switch (node->status) {
422 case rf_bwd1:
423 node->status = rf_bwd2;
424 if (node->dagFuncData)
425 rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData);
426 return (rf_DiskWriteFuncForThreads(node));
427 case rf_fired:
428 if (status)
429 node->status = rf_bad;
430 else
431 node->status = rf_good;
432 break;
433 case rf_recover:
434 /* probably should never reach this case */
435 if (status)
436 node->status = rf_panic;
437 else
438 node->status = rf_undone;
439 break;
440 default:
441 printf("rf_GenericWakeupFunc:");
442 printf("node->status is %d,", node->status);
443 printf("status is %d \n", status);
444 RF_PANIC();
445 break;
446 }
447 if (node->dagFuncData)
448 rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData);
449 return (rf_FinishNode(node, RF_INTR_CONTEXT));
450 }
451
452
453 /*****************************************************************************************
454 * there are three distinct types of xor nodes
455 * A "regular xor" is used in the fault-free case where the access spans a complete
456 * stripe unit. It assumes that the result buffer is one full stripe unit in size,
457 * and uses the stripe-unit-offset values that it computes from the PDAs to determine
458 * where within the stripe unit to XOR each argument buffer.
459 *
460 * A "simple xor" is used in the fault-free case where the access touches only a portion
461 * of one (or two, in some cases) stripe unit(s). It assumes that all the argument
462 * buffers are of the same size and have the same stripe unit offset.
463 *
464 * A "recovery xor" is used in the degraded-mode case. It's similar to the regular
465 * xor function except that it takes the failed PDA as an additional parameter, and
466 * uses it to determine what portions of the argument buffers need to be xor'd into
467 * the result buffer, and where in the result buffer they should go.
468 ****************************************************************************************/
469
470 /* xor the params together and store the result in the result field.
471 * assume the result field points to a buffer that is the size of one SU,
472 * and use the pda params to determine where within the buffer to XOR
473 * the input buffers.
474 */
475 int
476 rf_RegularXorFunc(node)
477 RF_DagNode_t *node;
478 {
479 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
480 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
481 RF_Etimer_t timer;
482 int i, retcode;
483
484 retcode = 0;
485 if (node->dagHdr->status == rf_enable) {
486 /* don't do the XOR if the input is the same as the output */
487 RF_ETIMER_START(timer);
488 for (i = 0; i < node->numParams - 1; i += 2)
489 if (node->params[i + 1].p != node->results[0]) {
490 retcode = rf_XorIntoBuffer(raidPtr, (RF_PhysDiskAddr_t *) node->params[i].p,
491 (char *) node->params[i + 1].p, (char *) node->results[0], node->dagHdr->bp);
492 }
493 RF_ETIMER_STOP(timer);
494 RF_ETIMER_EVAL(timer);
495 tracerec->xor_us += RF_ETIMER_VAL_US(timer);
496 }
497 return (rf_GenericWakeupFunc(node, retcode)); /* call wake func
498 * explicitly since no
499 * I/O in this node */
500 }
501 /* xor the inputs into the result buffer, ignoring placement issues */
502 int
503 rf_SimpleXorFunc(node)
504 RF_DagNode_t *node;
505 {
506 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
507 int i, retcode = 0;
508 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
509 RF_Etimer_t timer;
510
511 if (node->dagHdr->status == rf_enable) {
512 RF_ETIMER_START(timer);
513 /* don't do the XOR if the input is the same as the output */
514 for (i = 0; i < node->numParams - 1; i += 2)
515 if (node->params[i + 1].p != node->results[0]) {
516 retcode = rf_bxor((char *) node->params[i + 1].p, (char *) node->results[0],
517 rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[i].p)->numSector),
518 (struct buf *) node->dagHdr->bp);
519 }
520 RF_ETIMER_STOP(timer);
521 RF_ETIMER_EVAL(timer);
522 tracerec->xor_us += RF_ETIMER_VAL_US(timer);
523 }
524 return (rf_GenericWakeupFunc(node, retcode)); /* call wake func
525 * explicitly since no
526 * I/O in this node */
527 }
528 /* this xor is used by the degraded-mode dag functions to recover lost data.
529 * the second-to-last parameter is the PDA for the failed portion of the access.
530 * the code here looks at this PDA and assumes that the xor target buffer is
531 * equal in size to the number of sectors in the failed PDA. It then uses
532 * the other PDAs in the parameter list to determine where within the target
533 * buffer the corresponding data should be xored.
534 */
535 int
536 rf_RecoveryXorFunc(node)
537 RF_DagNode_t *node;
538 {
539 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
540 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
541 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
542 int i, retcode = 0;
543 RF_PhysDiskAddr_t *pda;
544 int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
545 char *srcbuf, *destbuf;
546 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
547 RF_Etimer_t timer;
548
549 if (node->dagHdr->status == rf_enable) {
550 RF_ETIMER_START(timer);
551 for (i = 0; i < node->numParams - 2; i += 2)
552 if (node->params[i + 1].p != node->results[0]) {
553 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
554 srcbuf = (char *) node->params[i + 1].p;
555 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
556 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
557 retcode = rf_bxor(srcbuf, destbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), node->dagHdr->bp);
558 }
559 RF_ETIMER_STOP(timer);
560 RF_ETIMER_EVAL(timer);
561 tracerec->xor_us += RF_ETIMER_VAL_US(timer);
562 }
563 return (rf_GenericWakeupFunc(node, retcode));
564 }
565 /*****************************************************************************************
566 * The next three functions are utilities used by the above xor-execution functions.
567 ****************************************************************************************/
568
569
570 /*
571 * this is just a glorified buffer xor. targbuf points to a buffer that is one full stripe unit
572 * in size. srcbuf points to a buffer that may be less than 1 SU, but never more. When the
573 * access described by pda is one SU in size (which by implication means it's SU-aligned),
574 * all that happens is (targbuf) <- (srcbuf ^ targbuf). When the access is less than one
575 * SU in size the XOR occurs on only the portion of targbuf identified in the pda.
576 */
577
578 int
579 rf_XorIntoBuffer(raidPtr, pda, srcbuf, targbuf, bp)
580 RF_Raid_t *raidPtr;
581 RF_PhysDiskAddr_t *pda;
582 char *srcbuf;
583 char *targbuf;
584 void *bp;
585 {
586 char *targptr;
587 int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
588 int SUOffset = pda->startSector % sectPerSU;
589 int length, retcode = 0;
590
591 RF_ASSERT(pda->numSector <= sectPerSU);
592
593 targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset);
594 length = rf_RaidAddressToByte(raidPtr, pda->numSector);
595 retcode = rf_bxor(srcbuf, targptr, length, bp);
596 return (retcode);
597 }
598 /* it really should be the case that the buffer pointers (returned by malloc)
599 * are aligned to the natural word size of the machine, so this is the only
600 * case we optimize for. The length should always be a multiple of the sector
601 * size, so there should be no problem with leftover bytes at the end.
602 */
603 int
604 rf_bxor(src, dest, len, bp)
605 char *src;
606 char *dest;
607 int len;
608 void *bp;
609 {
610 unsigned mask = sizeof(long) - 1, retcode = 0;
611
612 if (!(((unsigned long) src) & mask) && !(((unsigned long) dest) & mask) && !(len & mask)) {
613 retcode = rf_longword_bxor((unsigned long *) src, (unsigned long *) dest, len >> RF_LONGSHIFT, bp);
614 } else {
615 RF_ASSERT(0);
616 }
617 return (retcode);
618 }
619 /* map a user buffer into kernel space, if necessary */
620 #define REMAP_VA(_bp,x,y) (y) = (x)
621
622 /* When XORing in kernel mode, we need to map each user page to kernel space before we can access it.
623 * We don't want to assume anything about which input buffers are in kernel/user
624 * space, nor about their alignment, so in each loop we compute the maximum number
625 * of bytes that we can xor without crossing any page boundaries, and do only this many
626 * bytes before the next remap.
627 */
628 int
629 rf_longword_bxor(src, dest, len, bp)
630 unsigned long *src;
631 unsigned long *dest;
632 int len; /* longwords */
633 void *bp;
634 {
635 unsigned long *end = src + len;
636 unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */
637 unsigned long *pg_src, *pg_dest; /* per-page source/dest
638 * pointers */
639 int longs_this_time;/* # longwords to xor in the current iteration */
640
641 REMAP_VA(bp, src, pg_src);
642 REMAP_VA(bp, dest, pg_dest);
643 if (!pg_src || !pg_dest)
644 return (EFAULT);
645
646 while (len >= 4) {
647 longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), RF_BLIP(pg_dest)) >> RF_LONGSHIFT); /* note len in longwords */
648 src += longs_this_time;
649 dest += longs_this_time;
650 len -= longs_this_time;
651 while (longs_this_time >= 4) {
652 d0 = pg_dest[0];
653 d1 = pg_dest[1];
654 d2 = pg_dest[2];
655 d3 = pg_dest[3];
656 s0 = pg_src[0];
657 s1 = pg_src[1];
658 s2 = pg_src[2];
659 s3 = pg_src[3];
660 pg_dest[0] = d0 ^ s0;
661 pg_dest[1] = d1 ^ s1;
662 pg_dest[2] = d2 ^ s2;
663 pg_dest[3] = d3 ^ s3;
664 pg_src += 4;
665 pg_dest += 4;
666 longs_this_time -= 4;
667 }
668 while (longs_this_time > 0) { /* cannot cross any page
669 * boundaries here */
670 *pg_dest++ ^= *pg_src++;
671 longs_this_time--;
672 }
673
674 /* either we're done, or we've reached a page boundary on one
675 * (or possibly both) of the pointers */
676 if (len) {
677 if (RF_PAGE_ALIGNED(src))
678 REMAP_VA(bp, src, pg_src);
679 if (RF_PAGE_ALIGNED(dest))
680 REMAP_VA(bp, dest, pg_dest);
681 if (!pg_src || !pg_dest)
682 return (EFAULT);
683 }
684 }
685 while (src < end) {
686 *pg_dest++ ^= *pg_src++;
687 src++;
688 dest++;
689 len--;
690 if (RF_PAGE_ALIGNED(src))
691 REMAP_VA(bp, src, pg_src);
692 if (RF_PAGE_ALIGNED(dest))
693 REMAP_VA(bp, dest, pg_dest);
694 }
695 RF_ASSERT(len == 0);
696 return (0);
697 }
698
699 #if 0
700 /*
701 dst = a ^ b ^ c;
702 a may equal dst
703 see comment above longword_bxor
704 */
705 int
706 rf_longword_bxor3(dst, a, b, c, len, bp)
707 unsigned long *dst;
708 unsigned long *a;
709 unsigned long *b;
710 unsigned long *c;
711 int len; /* length in longwords */
712 void *bp;
713 {
714 unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
715 unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; /* per-page source/dest
716 * pointers */
717 int longs_this_time;/* # longs to xor in the current iteration */
718 char dst_is_a = 0;
719
720 REMAP_VA(bp, a, pg_a);
721 REMAP_VA(bp, b, pg_b);
722 REMAP_VA(bp, c, pg_c);
723 if (a == dst) {
724 pg_dst = pg_a;
725 dst_is_a = 1;
726 } else {
727 REMAP_VA(bp, dst, pg_dst);
728 }
729
730 /* align dest to cache line. Can't cross a pg boundary on dst here. */
731 while ((((unsigned long) pg_dst) & 0x1f)) {
732 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
733 dst++;
734 a++;
735 b++;
736 c++;
737 if (RF_PAGE_ALIGNED(a)) {
738 REMAP_VA(bp, a, pg_a);
739 if (!pg_a)
740 return (EFAULT);
741 }
742 if (RF_PAGE_ALIGNED(b)) {
743 REMAP_VA(bp, a, pg_b);
744 if (!pg_b)
745 return (EFAULT);
746 }
747 if (RF_PAGE_ALIGNED(c)) {
748 REMAP_VA(bp, a, pg_c);
749 if (!pg_c)
750 return (EFAULT);
751 }
752 len--;
753 }
754
755 while (len > 4) {
756 longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> RF_LONGSHIFT);
757 a += longs_this_time;
758 b += longs_this_time;
759 c += longs_this_time;
760 dst += longs_this_time;
761 len -= longs_this_time;
762 while (longs_this_time >= 4) {
763 a0 = pg_a[0];
764 longs_this_time -= 4;
765
766 a1 = pg_a[1];
767 a2 = pg_a[2];
768
769 a3 = pg_a[3];
770 pg_a += 4;
771
772 b0 = pg_b[0];
773 b1 = pg_b[1];
774
775 b2 = pg_b[2];
776 b3 = pg_b[3];
777 /* start dual issue */
778 a0 ^= b0;
779 b0 = pg_c[0];
780
781 pg_b += 4;
782 a1 ^= b1;
783
784 a2 ^= b2;
785 a3 ^= b3;
786
787 b1 = pg_c[1];
788 a0 ^= b0;
789
790 b2 = pg_c[2];
791 a1 ^= b1;
792
793 b3 = pg_c[3];
794 a2 ^= b2;
795
796 pg_dst[0] = a0;
797 a3 ^= b3;
798 pg_dst[1] = a1;
799 pg_c += 4;
800 pg_dst[2] = a2;
801 pg_dst[3] = a3;
802 pg_dst += 4;
803 }
804 while (longs_this_time > 0) { /* cannot cross any page
805 * boundaries here */
806 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
807 longs_this_time--;
808 }
809
810 if (len) {
811 if (RF_PAGE_ALIGNED(a)) {
812 REMAP_VA(bp, a, pg_a);
813 if (!pg_a)
814 return (EFAULT);
815 if (dst_is_a)
816 pg_dst = pg_a;
817 }
818 if (RF_PAGE_ALIGNED(b)) {
819 REMAP_VA(bp, b, pg_b);
820 if (!pg_b)
821 return (EFAULT);
822 }
823 if (RF_PAGE_ALIGNED(c)) {
824 REMAP_VA(bp, c, pg_c);
825 if (!pg_c)
826 return (EFAULT);
827 }
828 if (!dst_is_a)
829 if (RF_PAGE_ALIGNED(dst)) {
830 REMAP_VA(bp, dst, pg_dst);
831 if (!pg_dst)
832 return (EFAULT);
833 }
834 }
835 }
836 while (len) {
837 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
838 dst++;
839 a++;
840 b++;
841 c++;
842 if (RF_PAGE_ALIGNED(a)) {
843 REMAP_VA(bp, a, pg_a);
844 if (!pg_a)
845 return (EFAULT);
846 if (dst_is_a)
847 pg_dst = pg_a;
848 }
849 if (RF_PAGE_ALIGNED(b)) {
850 REMAP_VA(bp, b, pg_b);
851 if (!pg_b)
852 return (EFAULT);
853 }
854 if (RF_PAGE_ALIGNED(c)) {
855 REMAP_VA(bp, c, pg_c);
856 if (!pg_c)
857 return (EFAULT);
858 }
859 if (!dst_is_a)
860 if (RF_PAGE_ALIGNED(dst)) {
861 REMAP_VA(bp, dst, pg_dst);
862 if (!pg_dst)
863 return (EFAULT);
864 }
865 len--;
866 }
867 return (0);
868 }
869
870 int
871 rf_bxor3(dst, a, b, c, len, bp)
872 unsigned char *dst;
873 unsigned char *a;
874 unsigned char *b;
875 unsigned char *c;
876 unsigned long len;
877 void *bp;
878 {
879 RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7) == 0);
880
881 return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a,
882 (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT, bp));
883 }
884 #endif
885