rf_dagffwr.c revision 1.21 1 /* $NetBSD: rf_dagffwr.c,v 1.21 2004/03/06 23:52:20 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /*
30 * rf_dagff.c
31 *
32 * code for creating fault-free DAGs
33 *
34 */
35
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 1.21 2004/03/06 23:52:20 oster Exp $");
38
39 #include <dev/raidframe/raidframevar.h>
40
41 #include "rf_raid.h"
42 #include "rf_dag.h"
43 #include "rf_dagutils.h"
44 #include "rf_dagfuncs.h"
45 #include "rf_debugMem.h"
46 #include "rf_dagffrd.h"
47 #include "rf_general.h"
48 #include "rf_dagffwr.h"
49
50 /******************************************************************************
51 *
52 * General comments on DAG creation:
53 *
54 * All DAGs in this file use roll-away error recovery. Each DAG has a single
55 * commit node, usually called "Cmt." If an error occurs before the Cmt node
56 * is reached, the execution engine will halt forward execution and work
57 * backward through the graph, executing the undo functions. Assuming that
58 * each node in the graph prior to the Cmt node are undoable and atomic - or -
59 * does not make changes to permanent state, the graph will fail atomically.
60 * If an error occurs after the Cmt node executes, the engine will roll-forward
61 * through the graph, blindly executing nodes until it reaches the end.
62 * If a graph reaches the end, it is assumed to have completed successfully.
63 *
64 * A graph has only 1 Cmt node.
65 *
66 */
67
68
69 /******************************************************************************
70 *
71 * The following wrappers map the standard DAG creation interface to the
72 * DAG creation routines. Additionally, these wrappers enable experimentation
73 * with new DAG structures by providing an extra level of indirection, allowing
74 * the DAG creation routines to be replaced at this single point.
75 */
76
77
78 void
79 rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
80 RF_DagHeader_t *dag_h, void *bp,
81 RF_RaidAccessFlags_t flags,
82 RF_AllocListElem_t *allocList,
83 RF_IoType_t type)
84 {
85 rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
86 RF_IO_TYPE_WRITE);
87 }
88
89 void
90 rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
91 RF_DagHeader_t *dag_h, void *bp,
92 RF_RaidAccessFlags_t flags,
93 RF_AllocListElem_t *allocList,
94 RF_IoType_t type)
95 {
96 rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
97 RF_IO_TYPE_WRITE);
98 }
99
100 void
101 rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
102 RF_DagHeader_t *dag_h, void *bp,
103 RF_RaidAccessFlags_t flags,
104 RF_AllocListElem_t *allocList)
105 {
106 /* "normal" rollaway */
107 rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
108 allocList, &rf_xorFuncs, NULL);
109 }
110
111 void
112 rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
113 RF_DagHeader_t *dag_h, void *bp,
114 RF_RaidAccessFlags_t flags,
115 RF_AllocListElem_t *allocList)
116 {
117 /* "normal" rollaway */
118 rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
119 allocList, 1, rf_RegularXorFunc, RF_TRUE);
120 }
121
122
123 /******************************************************************************
124 *
125 * DAG creation code begins here
126 */
127
128
129 /******************************************************************************
130 *
131 * creates a DAG to perform a large-write operation:
132 *
133 * / Rod \ / Wnd \
134 * H -- block- Rod - Xor - Cmt - Wnd --- T
135 * \ Rod / \ Wnp /
136 * \[Wnq]/
137 *
138 * The XOR node also does the Q calculation in the P+Q architecture.
139 * All nodes are before the commit node (Cmt) are assumed to be atomic and
140 * undoable - or - they make no changes to permanent state.
141 *
142 * Rod = read old data
143 * Cmt = commit node
144 * Wnp = write new parity
145 * Wnd = write new data
146 * Wnq = write new "q"
147 * [] denotes optional segments in the graph
148 *
149 * Parameters: raidPtr - description of the physical array
150 * asmap - logical & physical addresses for this access
151 * bp - buffer ptr (holds write data)
152 * flags - general flags (e.g. disk locking)
153 * allocList - list of memory allocated in DAG creation
154 * nfaults - number of faults array can tolerate
155 * (equal to # redundancy units in stripe)
156 * redfuncs - list of redundancy generating functions
157 *
158 *****************************************************************************/
159
160 void
161 rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
162 RF_DagHeader_t *dag_h, void *bp,
163 RF_RaidAccessFlags_t flags,
164 RF_AllocListElem_t *allocList,
165 int nfaults, int (*redFunc) (RF_DagNode_t *),
166 int allowBufferRecycle)
167 {
168 RF_DagNode_t *nodes, *wndNodes, *rodNodes, *xorNode, *wnpNode;
169 RF_DagNode_t *wnqNode, *blockNode, *commitNode, *termNode;
170 int nWndNodes, nRodNodes, i, nodeNum, asmNum;
171 RF_AccessStripeMapHeader_t *new_asm_h[2];
172 RF_StripeNum_t parityStripeID;
173 char *sosBuffer, *eosBuffer;
174 RF_ReconUnitNum_t which_ru;
175 RF_RaidLayout_t *layoutPtr;
176 RF_PhysDiskAddr_t *pda;
177
178 layoutPtr = &(raidPtr->Layout);
179 parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
180 asmap->raidAddress,
181 &which_ru);
182
183 #if RF_DEBUG_DAG
184 if (rf_dagDebug) {
185 printf("[Creating large-write DAG]\n");
186 }
187 #endif
188 dag_h->creator = "LargeWriteDAG";
189
190 dag_h->numCommitNodes = 1;
191 dag_h->numCommits = 0;
192 dag_h->numSuccedents = 1;
193
194 /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */
195 nWndNodes = asmap->numStripeUnitsAccessed;
196 RF_MallocAndAdd(nodes,
197 (nWndNodes + 4 + nfaults) * sizeof(RF_DagNode_t),
198 (RF_DagNode_t *), allocList);
199 i = 0;
200 wndNodes = &nodes[i];
201 i += nWndNodes;
202 xorNode = &nodes[i];
203 i += 1;
204 wnpNode = &nodes[i];
205 i += 1;
206 blockNode = &nodes[i];
207 i += 1;
208 commitNode = &nodes[i];
209 i += 1;
210 termNode = &nodes[i];
211 i += 1;
212 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
213 if (nfaults == 2) {
214 wnqNode = &nodes[i];
215 i += 1;
216 } else {
217 #endif
218 wnqNode = NULL;
219 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
220 }
221 #endif
222 rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
223 new_asm_h, &nRodNodes, &sosBuffer,
224 &eosBuffer, allocList);
225 if (nRodNodes > 0) {
226 RF_MallocAndAdd(rodNodes, nRodNodes * sizeof(RF_DagNode_t),
227 (RF_DagNode_t *), allocList);
228 } else {
229 rodNodes = NULL;
230 }
231
232 /* begin node initialization */
233 if (nRodNodes > 0) {
234 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
235 rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0,
236 dag_h, "Nil", allocList);
237 } else {
238 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
239 rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0,
240 dag_h, "Nil", allocList);
241 }
242
243 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
244 rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0,
245 dag_h, "Cmt", allocList);
246 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
247 rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0,
248 dag_h, "Trm", allocList);
249
250 /* initialize the Rod nodes */
251 for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
252 if (new_asm_h[asmNum]) {
253 pda = new_asm_h[asmNum]->stripeMap->physInfo;
254 while (pda) {
255 rf_InitNode(&rodNodes[nodeNum], rf_wait,
256 RF_FALSE, rf_DiskReadFunc,
257 rf_DiskReadUndoFunc,
258 rf_GenericWakeupFunc,
259 1, 1, 4, 0, dag_h,
260 "Rod", allocList);
261 rodNodes[nodeNum].params[0].p = pda;
262 rodNodes[nodeNum].params[1].p = pda->bufPtr;
263 rodNodes[nodeNum].params[2].v = parityStripeID;
264 rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
265 which_ru);
266 nodeNum++;
267 pda = pda->next;
268 }
269 }
270 }
271 RF_ASSERT(nodeNum == nRodNodes);
272
273 /* initialize the wnd nodes */
274 pda = asmap->physInfo;
275 for (i = 0; i < nWndNodes; i++) {
276 rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE,
277 rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
278 rf_GenericWakeupFunc, 1, 1, 4, 0,
279 dag_h, "Wnd", allocList);
280 RF_ASSERT(pda != NULL);
281 wndNodes[i].params[0].p = pda;
282 wndNodes[i].params[1].p = pda->bufPtr;
283 wndNodes[i].params[2].v = parityStripeID;
284 wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
285 pda = pda->next;
286 }
287
288 /* initialize the redundancy node */
289 if (nRodNodes > 0) {
290 rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
291 rf_NullNodeUndoFunc, NULL, 1,
292 nRodNodes, 2 * (nWndNodes + nRodNodes) + 1,
293 nfaults, dag_h, "Xr ", allocList);
294 } else {
295 rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
296 rf_NullNodeUndoFunc, NULL, 1,
297 1, 2 * (nWndNodes + nRodNodes) + 1,
298 nfaults, dag_h, "Xr ", allocList);
299 }
300 xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
301 for (i = 0; i < nWndNodes; i++) {
302 /* pda */
303 xorNode->params[2 * i + 0] = wndNodes[i].params[0];
304 /* buf ptr */
305 xorNode->params[2 * i + 1] = wndNodes[i].params[1];
306 }
307 for (i = 0; i < nRodNodes; i++) {
308 /* pda */
309 xorNode->params[2 * (nWndNodes + i) + 0] = rodNodes[i].params[0];
310 /* buf ptr */
311 xorNode->params[2 * (nWndNodes + i) + 1] = rodNodes[i].params[1];
312 }
313 /* xor node needs to get at RAID information */
314 xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
315
316 /*
317 * Look for an Rod node that reads a complete SU. If none,
318 * alloc a buffer to receive the parity info. Note that we
319 * can't use a new data buffer because it will not have gotten
320 * written when the xor occurs. */
321 if (allowBufferRecycle) {
322 for (i = 0; i < nRodNodes; i++) {
323 if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
324 break;
325 }
326 }
327 if ((!allowBufferRecycle) || (i == nRodNodes)) {
328 RF_MallocAndAdd(xorNode->results[0],
329 rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
330 (void *), allocList);
331 } else {
332 xorNode->results[0] = rodNodes[i].params[1].p;
333 }
334
335 /* initialize the Wnp node */
336 rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
337 rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
338 dag_h, "Wnp", allocList);
339 wnpNode->params[0].p = asmap->parityInfo;
340 wnpNode->params[1].p = xorNode->results[0];
341 wnpNode->params[2].v = parityStripeID;
342 wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
343 /* parityInfo must describe entire parity unit */
344 RF_ASSERT(asmap->parityInfo->next == NULL);
345
346 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
347 if (nfaults == 2) {
348 /*
349 * We never try to recycle a buffer for the Q calcuation
350 * in addition to the parity. This would cause two buffers
351 * to get smashed during the P and Q calculation, guaranteeing
352 * one would be wrong.
353 */
354 RF_MallocAndAdd(xorNode->results[1],
355 rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
356 (void *), allocList);
357 rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
358 rf_DiskWriteUndoFunc, rf_GenericWakeupFunc,
359 1, 1, 4, 0, dag_h, "Wnq", allocList);
360 wnqNode->params[0].p = asmap->qInfo;
361 wnqNode->params[1].p = xorNode->results[1];
362 wnqNode->params[2].v = parityStripeID;
363 wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
364 /* parityInfo must describe entire parity unit */
365 RF_ASSERT(asmap->parityInfo->next == NULL);
366 }
367 #endif
368 /*
369 * Connect nodes to form graph.
370 */
371
372 /* connect dag header to block node */
373 RF_ASSERT(blockNode->numAntecedents == 0);
374 dag_h->succedents[0] = blockNode;
375
376 if (nRodNodes > 0) {
377 /* connect the block node to the Rod nodes */
378 RF_ASSERT(blockNode->numSuccedents == nRodNodes);
379 RF_ASSERT(xorNode->numAntecedents == nRodNodes);
380 for (i = 0; i < nRodNodes; i++) {
381 RF_ASSERT(rodNodes[i].numAntecedents == 1);
382 blockNode->succedents[i] = &rodNodes[i];
383 rodNodes[i].antecedents[0] = blockNode;
384 rodNodes[i].antType[0] = rf_control;
385
386 /* connect the Rod nodes to the Xor node */
387 RF_ASSERT(rodNodes[i].numSuccedents == 1);
388 rodNodes[i].succedents[0] = xorNode;
389 xorNode->antecedents[i] = &rodNodes[i];
390 xorNode->antType[i] = rf_trueData;
391 }
392 } else {
393 /* connect the block node to the Xor node */
394 RF_ASSERT(blockNode->numSuccedents == 1);
395 RF_ASSERT(xorNode->numAntecedents == 1);
396 blockNode->succedents[0] = xorNode;
397 xorNode->antecedents[0] = blockNode;
398 xorNode->antType[0] = rf_control;
399 }
400
401 /* connect the xor node to the commit node */
402 RF_ASSERT(xorNode->numSuccedents == 1);
403 RF_ASSERT(commitNode->numAntecedents == 1);
404 xorNode->succedents[0] = commitNode;
405 commitNode->antecedents[0] = xorNode;
406 commitNode->antType[0] = rf_control;
407
408 /* connect the commit node to the write nodes */
409 RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
410 for (i = 0; i < nWndNodes; i++) {
411 RF_ASSERT(wndNodes->numAntecedents == 1);
412 commitNode->succedents[i] = &wndNodes[i];
413 wndNodes[i].antecedents[0] = commitNode;
414 wndNodes[i].antType[0] = rf_control;
415 }
416 RF_ASSERT(wnpNode->numAntecedents == 1);
417 commitNode->succedents[nWndNodes] = wnpNode;
418 wnpNode->antecedents[0] = commitNode;
419 wnpNode->antType[0] = rf_trueData;
420 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
421 if (nfaults == 2) {
422 RF_ASSERT(wnqNode->numAntecedents == 1);
423 commitNode->succedents[nWndNodes + 1] = wnqNode;
424 wnqNode->antecedents[0] = commitNode;
425 wnqNode->antType[0] = rf_trueData;
426 }
427 #endif
428 /* connect the write nodes to the term node */
429 RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
430 RF_ASSERT(termNode->numSuccedents == 0);
431 for (i = 0; i < nWndNodes; i++) {
432 RF_ASSERT(wndNodes->numSuccedents == 1);
433 wndNodes[i].succedents[0] = termNode;
434 termNode->antecedents[i] = &wndNodes[i];
435 termNode->antType[i] = rf_control;
436 }
437 RF_ASSERT(wnpNode->numSuccedents == 1);
438 wnpNode->succedents[0] = termNode;
439 termNode->antecedents[nWndNodes] = wnpNode;
440 termNode->antType[nWndNodes] = rf_control;
441 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
442 if (nfaults == 2) {
443 RF_ASSERT(wnqNode->numSuccedents == 1);
444 wnqNode->succedents[0] = termNode;
445 termNode->antecedents[nWndNodes + 1] = wnqNode;
446 termNode->antType[nWndNodes + 1] = rf_control;
447 }
448 #endif
449 }
450 /******************************************************************************
451 *
452 * creates a DAG to perform a small-write operation (either raid 5 or pq),
453 * which is as follows:
454 *
455 * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
456 * \- Rod X / \----> Wnd [Und]-/
457 * [\- Rod X / \---> Wnd [Und]-/]
458 * [\- Roq -> Q / \--> Wnq [Unq]-/]
459 *
460 * Rop = read old parity
461 * Rod = read old data
462 * Roq = read old "q"
463 * Cmt = commit node
464 * Und = unlock data disk
465 * Unp = unlock parity disk
466 * Unq = unlock q disk
467 * Wnp = write new parity
468 * Wnd = write new data
469 * Wnq = write new "q"
470 * [ ] denotes optional segments in the graph
471 *
472 * Parameters: raidPtr - description of the physical array
473 * asmap - logical & physical addresses for this access
474 * bp - buffer ptr (holds write data)
475 * flags - general flags (e.g. disk locking)
476 * allocList - list of memory allocated in DAG creation
477 * pfuncs - list of parity generating functions
478 * qfuncs - list of q generating functions
479 *
480 * A null qfuncs indicates single fault tolerant
481 *****************************************************************************/
482
483 void
484 rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
485 RF_DagHeader_t *dag_h, void *bp,
486 RF_RaidAccessFlags_t flags,
487 RF_AllocListElem_t *allocList,
488 const RF_RedFuncs_t *pfuncs,
489 const RF_RedFuncs_t *qfuncs)
490 {
491 RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode;
492 RF_DagNode_t *xorNodes, *qNodes, *blockNode, *commitNode, *nodes;
493 RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes;
494 int i, j, nNodes, totalNumNodes;
495 RF_ReconUnitNum_t which_ru;
496 int (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
497 int (*qfunc) (RF_DagNode_t *);
498 int numDataNodes, numParityNodes;
499 RF_StripeNum_t parityStripeID;
500 RF_PhysDiskAddr_t *pda;
501 char *name, *qname;
502 long nfaults;
503
504 nfaults = qfuncs ? 2 : 1;
505
506 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
507 asmap->raidAddress, &which_ru);
508 pda = asmap->physInfo;
509 numDataNodes = asmap->numStripeUnitsAccessed;
510 numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
511
512 #if RF_DEBUG_DAG
513 if (rf_dagDebug) {
514 printf("[Creating small-write DAG]\n");
515 }
516 #endif
517 RF_ASSERT(numDataNodes > 0);
518 dag_h->creator = "SmallWriteDAG";
519
520 dag_h->numCommitNodes = 1;
521 dag_h->numCommits = 0;
522 dag_h->numSuccedents = 1;
523
524 /*
525 * DAG creation occurs in four steps:
526 * 1. count the number of nodes in the DAG
527 * 2. create the nodes
528 * 3. initialize the nodes
529 * 4. connect the nodes
530 */
531
532 /*
533 * Step 1. compute number of nodes in the graph
534 */
535
536 /* number of nodes: a read and write for each data unit a
537 * redundancy computation node for each parity node (nfaults *
538 * nparity) a read and write for each parity unit a block and
539 * commit node (2) a terminate node if atomic RMW an unlock
540 * node for each data unit, redundancy unit */
541 totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
542 + (nfaults * 2 * numParityNodes) + 3;
543 /*
544 * Step 2. create the nodes
545 */
546 RF_MallocAndAdd(nodes, totalNumNodes * sizeof(RF_DagNode_t),
547 (RF_DagNode_t *), allocList);
548 i = 0;
549 blockNode = &nodes[i];
550 i += 1;
551 commitNode = &nodes[i];
552 i += 1;
553 readDataNodes = &nodes[i];
554 i += numDataNodes;
555 readParityNodes = &nodes[i];
556 i += numParityNodes;
557 writeDataNodes = &nodes[i];
558 i += numDataNodes;
559 writeParityNodes = &nodes[i];
560 i += numParityNodes;
561 xorNodes = &nodes[i];
562 i += numParityNodes;
563 termNode = &nodes[i];
564 i += 1;
565
566 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
567 if (nfaults == 2) {
568 readQNodes = &nodes[i];
569 i += numParityNodes;
570 writeQNodes = &nodes[i];
571 i += numParityNodes;
572 qNodes = &nodes[i];
573 i += numParityNodes;
574 } else {
575 #endif
576 readQNodes = writeQNodes = qNodes = NULL;
577 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
578 }
579 #endif
580 RF_ASSERT(i == totalNumNodes);
581
582 /*
583 * Step 3. initialize the nodes
584 */
585 /* initialize block node (Nil) */
586 nNodes = numDataNodes + (nfaults * numParityNodes);
587 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
588 rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0,
589 dag_h, "Nil", allocList);
590
591 /* initialize commit node (Cmt) */
592 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
593 rf_NullNodeUndoFunc, NULL, nNodes,
594 (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
595
596 /* initialize terminate node (Trm) */
597 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
598 rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0,
599 dag_h, "Trm", allocList);
600
601 /* initialize nodes which read old data (Rod) */
602 for (i = 0; i < numDataNodes; i++) {
603 rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE,
604 rf_DiskReadFunc, rf_DiskReadUndoFunc,
605 rf_GenericWakeupFunc, (nfaults * numParityNodes),
606 1, 4, 0, dag_h, "Rod", allocList);
607 RF_ASSERT(pda != NULL);
608 /* physical disk addr desc */
609 readDataNodes[i].params[0].p = pda;
610 /* buffer to hold old data */
611 readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, pda, allocList);
612 readDataNodes[i].params[2].v = parityStripeID;
613 readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
614 which_ru);
615 pda = pda->next;
616 for (j = 0; j < readDataNodes[i].numSuccedents; j++) {
617 readDataNodes[i].propList[j] = NULL;
618 }
619 }
620
621 /* initialize nodes which read old parity (Rop) */
622 pda = asmap->parityInfo;
623 i = 0;
624 for (i = 0; i < numParityNodes; i++) {
625 RF_ASSERT(pda != NULL);
626 rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE,
627 rf_DiskReadFunc, rf_DiskReadUndoFunc,
628 rf_GenericWakeupFunc, numParityNodes, 1, 4, 0,
629 dag_h, "Rop", allocList);
630 readParityNodes[i].params[0].p = pda;
631 /* buffer to hold old parity */
632 readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, pda, allocList);
633 readParityNodes[i].params[2].v = parityStripeID;
634 readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
635 which_ru);
636 pda = pda->next;
637 for (j = 0; j < readParityNodes[i].numSuccedents; j++) {
638 readParityNodes[i].propList[0] = NULL;
639 }
640 }
641
642 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
643 /* initialize nodes which read old Q (Roq) */
644 if (nfaults == 2) {
645 pda = asmap->qInfo;
646 for (i = 0; i < numParityNodes; i++) {
647 RF_ASSERT(pda != NULL);
648 rf_InitNode(&readQNodes[i], rf_wait, RF_FALSE,
649 rf_DiskReadFunc, rf_DiskReadUndoFunc,
650 rf_GenericWakeupFunc, numParityNodes,
651 1, 4, 0, dag_h, "Roq", allocList);
652 readQNodes[i].params[0].p = pda;
653 /* buffer to hold old Q */
654 readQNodes[i].params[1].p = rf_AllocBuffer(raidPtr, pda, allocList);
655 readQNodes[i].params[2].v = parityStripeID;
656 readQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
657 which_ru);
658 pda = pda->next;
659 for (j = 0; j < readQNodes[i].numSuccedents; j++) {
660 readQNodes[i].propList[0] = NULL;
661 }
662 }
663 }
664 #endif
665 /* initialize nodes which write new data (Wnd) */
666 pda = asmap->physInfo;
667 for (i = 0; i < numDataNodes; i++) {
668 RF_ASSERT(pda != NULL);
669 rf_InitNode(&writeDataNodes[i], rf_wait, RF_FALSE,
670 rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
671 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
672 "Wnd", allocList);
673 /* physical disk addr desc */
674 writeDataNodes[i].params[0].p = pda;
675 /* buffer holding new data to be written */
676 writeDataNodes[i].params[1].p = pda->bufPtr;
677 writeDataNodes[i].params[2].v = parityStripeID;
678 writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
679 which_ru);
680 pda = pda->next;
681 }
682
683 /*
684 * Initialize nodes which compute new parity and Q.
685 */
686 /*
687 * We use the simple XOR func in the double-XOR case, and when
688 * we're accessing only a portion of one stripe unit. The
689 * distinction between the two is that the regular XOR func
690 * assumes that the targbuf is a full SU in size, and examines
691 * the pda associated with the buffer to decide where within
692 * the buffer to XOR the data, whereas the simple XOR func
693 * just XORs the data into the start of the buffer. */
694 if ((numParityNodes == 2) || ((numDataNodes == 1)
695 && (asmap->totalSectorsAccessed <
696 raidPtr->Layout.sectorsPerStripeUnit))) {
697 func = pfuncs->simple;
698 undoFunc = rf_NullNodeUndoFunc;
699 name = pfuncs->SimpleName;
700 if (qfuncs) {
701 qfunc = qfuncs->simple;
702 qname = qfuncs->SimpleName;
703 } else {
704 qfunc = NULL;
705 qname = NULL;
706 }
707 } else {
708 func = pfuncs->regular;
709 undoFunc = rf_NullNodeUndoFunc;
710 name = pfuncs->RegularName;
711 if (qfuncs) {
712 qfunc = qfuncs->regular;
713 qname = qfuncs->RegularName;
714 } else {
715 qfunc = NULL;
716 qname = NULL;
717 }
718 }
719 /*
720 * Initialize the xor nodes: params are {pda,buf}
721 * from {Rod,Wnd,Rop} nodes, and raidPtr
722 */
723 if (numParityNodes == 2) {
724 /* double-xor case */
725 for (i = 0; i < numParityNodes; i++) {
726 /* note: no wakeup func for xor */
727 rf_InitNode(&xorNodes[i], rf_wait, RF_FALSE, func,
728 undoFunc, NULL, 1,
729 (numDataNodes + numParityNodes),
730 7, 1, dag_h, name, allocList);
731 xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD;
732 xorNodes[i].params[0] = readDataNodes[i].params[0];
733 xorNodes[i].params[1] = readDataNodes[i].params[1];
734 xorNodes[i].params[2] = readParityNodes[i].params[0];
735 xorNodes[i].params[3] = readParityNodes[i].params[1];
736 xorNodes[i].params[4] = writeDataNodes[i].params[0];
737 xorNodes[i].params[5] = writeDataNodes[i].params[1];
738 xorNodes[i].params[6].p = raidPtr;
739 /* use old parity buf as target buf */
740 xorNodes[i].results[0] = readParityNodes[i].params[1].p;
741 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
742 if (nfaults == 2) {
743 /* note: no wakeup func for qor */
744 rf_InitNode(&qNodes[i], rf_wait, RF_FALSE,
745 qfunc, undoFunc, NULL, 1,
746 (numDataNodes + numParityNodes),
747 7, 1, dag_h, qname, allocList);
748 qNodes[i].params[0] = readDataNodes[i].params[0];
749 qNodes[i].params[1] = readDataNodes[i].params[1];
750 qNodes[i].params[2] = readQNodes[i].params[0];
751 qNodes[i].params[3] = readQNodes[i].params[1];
752 qNodes[i].params[4] = writeDataNodes[i].params[0];
753 qNodes[i].params[5] = writeDataNodes[i].params[1];
754 qNodes[i].params[6].p = raidPtr;
755 /* use old Q buf as target buf */
756 qNodes[i].results[0] = readQNodes[i].params[1].p;
757 }
758 #endif
759 }
760 } else {
761 /* there is only one xor node in this case */
762 rf_InitNode(&xorNodes[0], rf_wait, RF_FALSE, func,
763 undoFunc, NULL, 1, (numDataNodes + numParityNodes),
764 (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
765 dag_h, name, allocList);
766 xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD;
767 for (i = 0; i < numDataNodes + 1; i++) {
768 /* set up params related to Rod and Rop nodes */
769 xorNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */
770 xorNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer ptr */
771 }
772 for (i = 0; i < numDataNodes; i++) {
773 /* set up params related to Wnd and Wnp nodes */
774 xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = /* pda */
775 writeDataNodes[i].params[0];
776 xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */
777 writeDataNodes[i].params[1];
778 }
779 /* xor node needs to get at RAID information */
780 xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
781 xorNodes[0].results[0] = readParityNodes[0].params[1].p;
782 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
783 if (nfaults == 2) {
784 rf_InitNode(&qNodes[0], rf_wait, RF_FALSE, qfunc,
785 undoFunc, NULL, 1,
786 (numDataNodes + numParityNodes),
787 (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
788 dag_h, qname, allocList);
789 for (i = 0; i < numDataNodes; i++) {
790 /* set up params related to Rod */
791 qNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */
792 qNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer ptr */
793 }
794 /* and read old q */
795 qNodes[0].params[2 * numDataNodes + 0] = /* pda */
796 readQNodes[0].params[0];
797 qNodes[0].params[2 * numDataNodes + 1] = /* buffer ptr */
798 readQNodes[0].params[1];
799 for (i = 0; i < numDataNodes; i++) {
800 /* set up params related to Wnd nodes */
801 qNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = /* pda */
802 writeDataNodes[i].params[0];
803 qNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */
804 writeDataNodes[i].params[1];
805 }
806 /* xor node needs to get at RAID information */
807 qNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
808 qNodes[0].results[0] = readQNodes[0].params[1].p;
809 }
810 #endif
811 }
812
813 /* initialize nodes which write new parity (Wnp) */
814 pda = asmap->parityInfo;
815 for (i = 0; i < numParityNodes; i++) {
816 rf_InitNode(&writeParityNodes[i], rf_wait, RF_FALSE,
817 rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
818 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
819 "Wnp", allocList);
820 RF_ASSERT(pda != NULL);
821 writeParityNodes[i].params[0].p = pda; /* param 1 (bufPtr)
822 * filled in by xor node */
823 writeParityNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer for
824 * parity write
825 * operation */
826 writeParityNodes[i].params[2].v = parityStripeID;
827 writeParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
828 which_ru);
829 pda = pda->next;
830 }
831
832 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
833 /* initialize nodes which write new Q (Wnq) */
834 if (nfaults == 2) {
835 pda = asmap->qInfo;
836 for (i = 0; i < numParityNodes; i++) {
837 rf_InitNode(&writeQNodes[i], rf_wait, RF_FALSE,
838 rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
839 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
840 "Wnq", allocList);
841 RF_ASSERT(pda != NULL);
842 writeQNodes[i].params[0].p = pda; /* param 1 (bufPtr)
843 * filled in by xor node */
844 writeQNodes[i].params[1].p = qNodes[i].results[0]; /* buffer pointer for
845 * parity write
846 * operation */
847 writeQNodes[i].params[2].v = parityStripeID;
848 writeQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
849 which_ru);
850 pda = pda->next;
851 }
852 }
853 #endif
854 /*
855 * Step 4. connect the nodes.
856 */
857
858 /* connect header to block node */
859 dag_h->succedents[0] = blockNode;
860
861 /* connect block node to read old data nodes */
862 RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
863 for (i = 0; i < numDataNodes; i++) {
864 blockNode->succedents[i] = &readDataNodes[i];
865 RF_ASSERT(readDataNodes[i].numAntecedents == 1);
866 readDataNodes[i].antecedents[0] = blockNode;
867 readDataNodes[i].antType[0] = rf_control;
868 }
869
870 /* connect block node to read old parity nodes */
871 for (i = 0; i < numParityNodes; i++) {
872 blockNode->succedents[numDataNodes + i] = &readParityNodes[i];
873 RF_ASSERT(readParityNodes[i].numAntecedents == 1);
874 readParityNodes[i].antecedents[0] = blockNode;
875 readParityNodes[i].antType[0] = rf_control;
876 }
877
878 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
879 /* connect block node to read old Q nodes */
880 if (nfaults == 2) {
881 for (i = 0; i < numParityNodes; i++) {
882 blockNode->succedents[numDataNodes + numParityNodes + i] = &readQNodes[i];
883 RF_ASSERT(readQNodes[i].numAntecedents == 1);
884 readQNodes[i].antecedents[0] = blockNode;
885 readQNodes[i].antType[0] = rf_control;
886 }
887 }
888 #endif
889 /* connect read old data nodes to xor nodes */
890 for (i = 0; i < numDataNodes; i++) {
891 RF_ASSERT(readDataNodes[i].numSuccedents == (nfaults * numParityNodes));
892 for (j = 0; j < numParityNodes; j++) {
893 RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes);
894 readDataNodes[i].succedents[j] = &xorNodes[j];
895 xorNodes[j].antecedents[i] = &readDataNodes[i];
896 xorNodes[j].antType[i] = rf_trueData;
897 }
898 }
899
900 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
901 /* connect read old data nodes to q nodes */
902 if (nfaults == 2) {
903 for (i = 0; i < numDataNodes; i++) {
904 for (j = 0; j < numParityNodes; j++) {
905 RF_ASSERT(qNodes[j].numAntecedents == numDataNodes + numParityNodes);
906 readDataNodes[i].succedents[numParityNodes + j] = &qNodes[j];
907 qNodes[j].antecedents[i] = &readDataNodes[i];
908 qNodes[j].antType[i] = rf_trueData;
909 }
910 }
911 }
912 #endif
913 /* connect read old parity nodes to xor nodes */
914 for (i = 0; i < numParityNodes; i++) {
915 RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes);
916 for (j = 0; j < numParityNodes; j++) {
917 readParityNodes[i].succedents[j] = &xorNodes[j];
918 xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i];
919 xorNodes[j].antType[numDataNodes + i] = rf_trueData;
920 }
921 }
922
923 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
924 /* connect read old q nodes to q nodes */
925 if (nfaults == 2) {
926 for (i = 0; i < numParityNodes; i++) {
927 RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes);
928 for (j = 0; j < numParityNodes; j++) {
929 readQNodes[i].succedents[j] = &qNodes[j];
930 qNodes[j].antecedents[numDataNodes + i] = &readQNodes[i];
931 qNodes[j].antType[numDataNodes + i] = rf_trueData;
932 }
933 }
934 }
935 #endif
936 /* connect xor nodes to commit node */
937 RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
938 for (i = 0; i < numParityNodes; i++) {
939 RF_ASSERT(xorNodes[i].numSuccedents == 1);
940 xorNodes[i].succedents[0] = commitNode;
941 commitNode->antecedents[i] = &xorNodes[i];
942 commitNode->antType[i] = rf_control;
943 }
944
945 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
946 /* connect q nodes to commit node */
947 if (nfaults == 2) {
948 for (i = 0; i < numParityNodes; i++) {
949 RF_ASSERT(qNodes[i].numSuccedents == 1);
950 qNodes[i].succedents[0] = commitNode;
951 commitNode->antecedents[i + numParityNodes] = &qNodes[i];
952 commitNode->antType[i + numParityNodes] = rf_control;
953 }
954 }
955 #endif
956 /* connect commit node to write nodes */
957 RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
958 for (i = 0; i < numDataNodes; i++) {
959 RF_ASSERT(writeDataNodes[i].numAntecedents == 1);
960 commitNode->succedents[i] = &writeDataNodes[i];
961 writeDataNodes[i].antecedents[0] = commitNode;
962 writeDataNodes[i].antType[0] = rf_trueData;
963 }
964 for (i = 0; i < numParityNodes; i++) {
965 RF_ASSERT(writeParityNodes[i].numAntecedents == 1);
966 commitNode->succedents[i + numDataNodes] = &writeParityNodes[i];
967 writeParityNodes[i].antecedents[0] = commitNode;
968 writeParityNodes[i].antType[0] = rf_trueData;
969 }
970 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
971 if (nfaults == 2) {
972 for (i = 0; i < numParityNodes; i++) {
973 RF_ASSERT(writeQNodes[i].numAntecedents == 1);
974 commitNode->succedents[i + numDataNodes + numParityNodes] = &writeQNodes[i];
975 writeQNodes[i].antecedents[0] = commitNode;
976 writeQNodes[i].antType[0] = rf_trueData;
977 }
978 }
979 #endif
980 RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
981 RF_ASSERT(termNode->numSuccedents == 0);
982 for (i = 0; i < numDataNodes; i++) {
983 /* connect write new data nodes to term node */
984 RF_ASSERT(writeDataNodes[i].numSuccedents == 1);
985 RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
986 writeDataNodes[i].succedents[0] = termNode;
987 termNode->antecedents[i] = &writeDataNodes[i];
988 termNode->antType[i] = rf_control;
989 }
990
991 for (i = 0; i < numParityNodes; i++) {
992 RF_ASSERT(writeParityNodes[i].numSuccedents == 1);
993 writeParityNodes[i].succedents[0] = termNode;
994 termNode->antecedents[numDataNodes + i] = &writeParityNodes[i];
995 termNode->antType[numDataNodes + i] = rf_control;
996 }
997
998 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
999 if (nfaults == 2) {
1000 for (i = 0; i < numParityNodes; i++) {
1001 RF_ASSERT(writeQNodes[i].numSuccedents == 1);
1002 writeQNodes[i].succedents[0] = termNode;
1003 termNode->antecedents[numDataNodes + numParityNodes + i] = &writeQNodes[i];
1004 termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
1005 }
1006 }
1007 #endif
1008 }
1009
1010
1011 /******************************************************************************
1012 * create a write graph (fault-free or degraded) for RAID level 1
1013 *
1014 * Hdr -> Commit -> Wpd -> Nil -> Trm
1015 * -> Wsd ->
1016 *
1017 * The "Wpd" node writes data to the primary copy in the mirror pair
1018 * The "Wsd" node writes data to the secondary copy in the mirror pair
1019 *
1020 * Parameters: raidPtr - description of the physical array
1021 * asmap - logical & physical addresses for this access
1022 * bp - buffer ptr (holds write data)
1023 * flags - general flags (e.g. disk locking)
1024 * allocList - list of memory allocated in DAG creation
1025 *****************************************************************************/
1026
1027 void
1028 rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
1029 RF_DagHeader_t *dag_h, void *bp,
1030 RF_RaidAccessFlags_t flags,
1031 RF_AllocListElem_t *allocList)
1032 {
1033 RF_DagNode_t *unblockNode, *termNode, *commitNode;
1034 RF_DagNode_t *nodes, *wndNode, *wmirNode;
1035 int nWndNodes, nWmirNodes, i;
1036 RF_ReconUnitNum_t which_ru;
1037 RF_PhysDiskAddr_t *pda, *pdaP;
1038 RF_StripeNum_t parityStripeID;
1039
1040 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
1041 asmap->raidAddress, &which_ru);
1042 #if RF_DEBUG_DAG
1043 if (rf_dagDebug) {
1044 printf("[Creating RAID level 1 write DAG]\n");
1045 }
1046 #endif
1047 dag_h->creator = "RaidOneWriteDAG";
1048
1049 /* 2 implies access not SU aligned */
1050 nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
1051 nWndNodes = (asmap->physInfo->next) ? 2 : 1;
1052
1053 /* alloc the Wnd nodes and the Wmir node */
1054 if (asmap->numDataFailed == 1)
1055 nWndNodes--;
1056 if (asmap->numParityFailed == 1)
1057 nWmirNodes--;
1058
1059 /* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
1060 * + terminator) */
1061 RF_MallocAndAdd(nodes,
1062 (nWndNodes + nWmirNodes + 3) * sizeof(RF_DagNode_t),
1063 (RF_DagNode_t *), allocList);
1064 i = 0;
1065 wndNode = &nodes[i];
1066 i += nWndNodes;
1067 wmirNode = &nodes[i];
1068 i += nWmirNodes;
1069 commitNode = &nodes[i];
1070 i += 1;
1071 unblockNode = &nodes[i];
1072 i += 1;
1073 termNode = &nodes[i];
1074 i += 1;
1075 RF_ASSERT(i == (nWndNodes + nWmirNodes + 3));
1076
1077 /* this dag can commit immediately */
1078 dag_h->numCommitNodes = 1;
1079 dag_h->numCommits = 0;
1080 dag_h->numSuccedents = 1;
1081
1082 /* initialize the commit, unblock, and term nodes */
1083 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
1084 rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes),
1085 0, 0, 0, dag_h, "Cmt", allocList);
1086 rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
1087 rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes),
1088 0, 0, dag_h, "Nil", allocList);
1089 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
1090 rf_TerminateUndoFunc, NULL, 0, 1, 0, 0,
1091 dag_h, "Trm", allocList);
1092
1093 /* initialize the wnd nodes */
1094 if (nWndNodes > 0) {
1095 pda = asmap->physInfo;
1096 for (i = 0; i < nWndNodes; i++) {
1097 rf_InitNode(&wndNode[i], rf_wait, RF_FALSE,
1098 rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1099 rf_GenericWakeupFunc, 1, 1, 4, 0,
1100 dag_h, "Wpd", allocList);
1101 RF_ASSERT(pda != NULL);
1102 wndNode[i].params[0].p = pda;
1103 wndNode[i].params[1].p = pda->bufPtr;
1104 wndNode[i].params[2].v = parityStripeID;
1105 wndNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1106 pda = pda->next;
1107 }
1108 RF_ASSERT(pda == NULL);
1109 }
1110 /* initialize the mirror nodes */
1111 if (nWmirNodes > 0) {
1112 pda = asmap->physInfo;
1113 pdaP = asmap->parityInfo;
1114 for (i = 0; i < nWmirNodes; i++) {
1115 rf_InitNode(&wmirNode[i], rf_wait, RF_FALSE,
1116 rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1117 rf_GenericWakeupFunc, 1, 1, 4, 0,
1118 dag_h, "Wsd", allocList);
1119 RF_ASSERT(pda != NULL);
1120 wmirNode[i].params[0].p = pdaP;
1121 wmirNode[i].params[1].p = pda->bufPtr;
1122 wmirNode[i].params[2].v = parityStripeID;
1123 wmirNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1124 pda = pda->next;
1125 pdaP = pdaP->next;
1126 }
1127 RF_ASSERT(pda == NULL);
1128 RF_ASSERT(pdaP == NULL);
1129 }
1130 /* link the header node to the commit node */
1131 RF_ASSERT(dag_h->numSuccedents == 1);
1132 RF_ASSERT(commitNode->numAntecedents == 0);
1133 dag_h->succedents[0] = commitNode;
1134
1135 /* link the commit node to the write nodes */
1136 RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
1137 for (i = 0; i < nWndNodes; i++) {
1138 RF_ASSERT(wndNode[i].numAntecedents == 1);
1139 commitNode->succedents[i] = &wndNode[i];
1140 wndNode[i].antecedents[0] = commitNode;
1141 wndNode[i].antType[0] = rf_control;
1142 }
1143 for (i = 0; i < nWmirNodes; i++) {
1144 RF_ASSERT(wmirNode[i].numAntecedents == 1);
1145 commitNode->succedents[i + nWndNodes] = &wmirNode[i];
1146 wmirNode[i].antecedents[0] = commitNode;
1147 wmirNode[i].antType[0] = rf_control;
1148 }
1149
1150 /* link the write nodes to the unblock node */
1151 RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
1152 for (i = 0; i < nWndNodes; i++) {
1153 RF_ASSERT(wndNode[i].numSuccedents == 1);
1154 wndNode[i].succedents[0] = unblockNode;
1155 unblockNode->antecedents[i] = &wndNode[i];
1156 unblockNode->antType[i] = rf_control;
1157 }
1158 for (i = 0; i < nWmirNodes; i++) {
1159 RF_ASSERT(wmirNode[i].numSuccedents == 1);
1160 wmirNode[i].succedents[0] = unblockNode;
1161 unblockNode->antecedents[i + nWndNodes] = &wmirNode[i];
1162 unblockNode->antType[i + nWndNodes] = rf_control;
1163 }
1164
1165 /* link the unblock node to the term node */
1166 RF_ASSERT(unblockNode->numSuccedents == 1);
1167 RF_ASSERT(termNode->numAntecedents == 1);
1168 RF_ASSERT(termNode->numSuccedents == 0);
1169 unblockNode->succedents[0] = termNode;
1170 termNode->antecedents[0] = unblockNode;
1171 termNode->antType[0] = rf_control;
1172 }
1173