rf_dagffwr.c revision 1.27 1 1.27 oster /* $NetBSD: rf_dagffwr.c,v 1.27 2004/04/09 23:10:16 oster Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster
29 1.1 oster /*
30 1.1 oster * rf_dagff.c
31 1.1 oster *
32 1.1 oster * code for creating fault-free DAGs
33 1.1 oster *
34 1.1 oster */
35 1.7 lukem
36 1.7 lukem #include <sys/cdefs.h>
37 1.27 oster __KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 1.27 2004/04/09 23:10:16 oster Exp $");
38 1.1 oster
39 1.6 oster #include <dev/raidframe/raidframevar.h>
40 1.6 oster
41 1.1 oster #include "rf_raid.h"
42 1.1 oster #include "rf_dag.h"
43 1.1 oster #include "rf_dagutils.h"
44 1.1 oster #include "rf_dagfuncs.h"
45 1.1 oster #include "rf_debugMem.h"
46 1.1 oster #include "rf_dagffrd.h"
47 1.1 oster #include "rf_general.h"
48 1.1 oster #include "rf_dagffwr.h"
49 1.23 oster #include "rf_map.h"
50 1.1 oster
51 1.1 oster /******************************************************************************
52 1.1 oster *
53 1.1 oster * General comments on DAG creation:
54 1.3 oster *
55 1.1 oster * All DAGs in this file use roll-away error recovery. Each DAG has a single
56 1.1 oster * commit node, usually called "Cmt." If an error occurs before the Cmt node
57 1.1 oster * is reached, the execution engine will halt forward execution and work
58 1.1 oster * backward through the graph, executing the undo functions. Assuming that
59 1.1 oster * each node in the graph prior to the Cmt node are undoable and atomic - or -
60 1.1 oster * does not make changes to permanent state, the graph will fail atomically.
61 1.1 oster * If an error occurs after the Cmt node executes, the engine will roll-forward
62 1.1 oster * through the graph, blindly executing nodes until it reaches the end.
63 1.1 oster * If a graph reaches the end, it is assumed to have completed successfully.
64 1.1 oster *
65 1.1 oster * A graph has only 1 Cmt node.
66 1.1 oster *
67 1.1 oster */
68 1.1 oster
69 1.1 oster
70 1.1 oster /******************************************************************************
71 1.1 oster *
72 1.1 oster * The following wrappers map the standard DAG creation interface to the
73 1.1 oster * DAG creation routines. Additionally, these wrappers enable experimentation
74 1.1 oster * with new DAG structures by providing an extra level of indirection, allowing
75 1.1 oster * the DAG creation routines to be replaced at this single point.
76 1.1 oster */
77 1.1 oster
78 1.1 oster
79 1.3 oster void
80 1.13 oster rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
81 1.13 oster RF_DagHeader_t *dag_h, void *bp,
82 1.13 oster RF_RaidAccessFlags_t flags,
83 1.13 oster RF_AllocListElem_t *allocList,
84 1.13 oster RF_IoType_t type)
85 1.1 oster {
86 1.3 oster rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
87 1.14 oster RF_IO_TYPE_WRITE);
88 1.1 oster }
89 1.1 oster
90 1.3 oster void
91 1.13 oster rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
92 1.13 oster RF_DagHeader_t *dag_h, void *bp,
93 1.13 oster RF_RaidAccessFlags_t flags,
94 1.13 oster RF_AllocListElem_t *allocList,
95 1.13 oster RF_IoType_t type)
96 1.1 oster {
97 1.3 oster rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
98 1.14 oster RF_IO_TYPE_WRITE);
99 1.1 oster }
100 1.1 oster
101 1.3 oster void
102 1.13 oster rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
103 1.13 oster RF_DagHeader_t *dag_h, void *bp,
104 1.13 oster RF_RaidAccessFlags_t flags,
105 1.13 oster RF_AllocListElem_t *allocList)
106 1.1 oster {
107 1.3 oster /* "normal" rollaway */
108 1.14 oster rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
109 1.14 oster allocList, &rf_xorFuncs, NULL);
110 1.1 oster }
111 1.1 oster
112 1.3 oster void
113 1.13 oster rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
114 1.13 oster RF_DagHeader_t *dag_h, void *bp,
115 1.13 oster RF_RaidAccessFlags_t flags,
116 1.13 oster RF_AllocListElem_t *allocList)
117 1.1 oster {
118 1.3 oster /* "normal" rollaway */
119 1.14 oster rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
120 1.14 oster allocList, 1, rf_RegularXorFunc, RF_TRUE);
121 1.1 oster }
122 1.1 oster
123 1.1 oster
124 1.1 oster /******************************************************************************
125 1.1 oster *
126 1.1 oster * DAG creation code begins here
127 1.1 oster */
128 1.1 oster
129 1.1 oster
130 1.1 oster /******************************************************************************
131 1.1 oster *
132 1.1 oster * creates a DAG to perform a large-write operation:
133 1.1 oster *
134 1.1 oster * / Rod \ / Wnd \
135 1.1 oster * H -- block- Rod - Xor - Cmt - Wnd --- T
136 1.1 oster * \ Rod / \ Wnp /
137 1.1 oster * \[Wnq]/
138 1.1 oster *
139 1.1 oster * The XOR node also does the Q calculation in the P+Q architecture.
140 1.1 oster * All nodes are before the commit node (Cmt) are assumed to be atomic and
141 1.1 oster * undoable - or - they make no changes to permanent state.
142 1.1 oster *
143 1.1 oster * Rod = read old data
144 1.1 oster * Cmt = commit node
145 1.1 oster * Wnp = write new parity
146 1.1 oster * Wnd = write new data
147 1.1 oster * Wnq = write new "q"
148 1.1 oster * [] denotes optional segments in the graph
149 1.1 oster *
150 1.1 oster * Parameters: raidPtr - description of the physical array
151 1.1 oster * asmap - logical & physical addresses for this access
152 1.1 oster * bp - buffer ptr (holds write data)
153 1.3 oster * flags - general flags (e.g. disk locking)
154 1.1 oster * allocList - list of memory allocated in DAG creation
155 1.1 oster * nfaults - number of faults array can tolerate
156 1.1 oster * (equal to # redundancy units in stripe)
157 1.1 oster * redfuncs - list of redundancy generating functions
158 1.1 oster *
159 1.1 oster *****************************************************************************/
160 1.1 oster
161 1.3 oster void
162 1.13 oster rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
163 1.13 oster RF_DagHeader_t *dag_h, void *bp,
164 1.13 oster RF_RaidAccessFlags_t flags,
165 1.13 oster RF_AllocListElem_t *allocList,
166 1.13 oster int nfaults, int (*redFunc) (RF_DagNode_t *),
167 1.13 oster int allowBufferRecycle)
168 1.1 oster {
169 1.22 oster RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode;
170 1.3 oster RF_DagNode_t *wnqNode, *blockNode, *commitNode, *termNode;
171 1.3 oster int nWndNodes, nRodNodes, i, nodeNum, asmNum;
172 1.3 oster RF_AccessStripeMapHeader_t *new_asm_h[2];
173 1.3 oster RF_StripeNum_t parityStripeID;
174 1.3 oster char *sosBuffer, *eosBuffer;
175 1.3 oster RF_ReconUnitNum_t which_ru;
176 1.3 oster RF_RaidLayout_t *layoutPtr;
177 1.3 oster RF_PhysDiskAddr_t *pda;
178 1.3 oster
179 1.3 oster layoutPtr = &(raidPtr->Layout);
180 1.14 oster parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
181 1.14 oster asmap->raidAddress,
182 1.14 oster &which_ru);
183 1.3 oster
184 1.19 oster #if RF_DEBUG_DAG
185 1.3 oster if (rf_dagDebug) {
186 1.3 oster printf("[Creating large-write DAG]\n");
187 1.3 oster }
188 1.19 oster #endif
189 1.3 oster dag_h->creator = "LargeWriteDAG";
190 1.3 oster
191 1.3 oster dag_h->numCommitNodes = 1;
192 1.3 oster dag_h->numCommits = 0;
193 1.3 oster dag_h->numSuccedents = 1;
194 1.3 oster
195 1.3 oster /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */
196 1.3 oster nWndNodes = asmap->numStripeUnitsAccessed;
197 1.22 oster
198 1.22 oster for (i = 0; i < nWndNodes; i++) {
199 1.22 oster tmpNode = rf_AllocDAGNode();
200 1.22 oster tmpNode->list_next = dag_h->nodes;
201 1.22 oster dag_h->nodes = tmpNode;
202 1.22 oster }
203 1.22 oster wndNodes = dag_h->nodes;
204 1.22 oster
205 1.22 oster xorNode = rf_AllocDAGNode();
206 1.22 oster xorNode->list_next = dag_h->nodes;
207 1.22 oster dag_h->nodes = xorNode;
208 1.22 oster
209 1.22 oster wnpNode = rf_AllocDAGNode();
210 1.22 oster wnpNode->list_next = dag_h->nodes;
211 1.22 oster dag_h->nodes = wnpNode;
212 1.22 oster
213 1.22 oster blockNode = rf_AllocDAGNode();
214 1.22 oster blockNode->list_next = dag_h->nodes;
215 1.22 oster dag_h->nodes = blockNode;
216 1.22 oster
217 1.22 oster commitNode = rf_AllocDAGNode();
218 1.22 oster commitNode->list_next = dag_h->nodes;
219 1.22 oster dag_h->nodes = commitNode;
220 1.22 oster
221 1.22 oster termNode = rf_AllocDAGNode();
222 1.22 oster termNode->list_next = dag_h->nodes;
223 1.22 oster dag_h->nodes = termNode;
224 1.22 oster
225 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
226 1.3 oster if (nfaults == 2) {
227 1.22 oster wnqNode = rf_AllocDAGNode();
228 1.3 oster } else {
229 1.20 oster #endif
230 1.3 oster wnqNode = NULL;
231 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
232 1.3 oster }
233 1.20 oster #endif
234 1.14 oster rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
235 1.14 oster new_asm_h, &nRodNodes, &sosBuffer,
236 1.14 oster &eosBuffer, allocList);
237 1.3 oster if (nRodNodes > 0) {
238 1.22 oster for (i = 0; i < nRodNodes; i++) {
239 1.22 oster tmpNode = rf_AllocDAGNode();
240 1.22 oster tmpNode->list_next = dag_h->nodes;
241 1.22 oster dag_h->nodes = tmpNode;
242 1.22 oster }
243 1.22 oster rodNodes = dag_h->nodes;
244 1.3 oster } else {
245 1.3 oster rodNodes = NULL;
246 1.3 oster }
247 1.3 oster
248 1.3 oster /* begin node initialization */
249 1.3 oster if (nRodNodes > 0) {
250 1.14 oster rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
251 1.14 oster rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0,
252 1.14 oster dag_h, "Nil", allocList);
253 1.3 oster } else {
254 1.14 oster rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
255 1.14 oster rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0,
256 1.14 oster dag_h, "Nil", allocList);
257 1.3 oster }
258 1.3 oster
259 1.14 oster rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
260 1.14 oster rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0,
261 1.14 oster dag_h, "Cmt", allocList);
262 1.14 oster rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
263 1.14 oster rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0,
264 1.14 oster dag_h, "Trm", allocList);
265 1.3 oster
266 1.3 oster /* initialize the Rod nodes */
267 1.22 oster tmpNode = rodNodes;
268 1.3 oster for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
269 1.3 oster if (new_asm_h[asmNum]) {
270 1.3 oster pda = new_asm_h[asmNum]->stripeMap->physInfo;
271 1.3 oster while (pda) {
272 1.22 oster rf_InitNode(tmpNode, rf_wait,
273 1.14 oster RF_FALSE, rf_DiskReadFunc,
274 1.14 oster rf_DiskReadUndoFunc,
275 1.14 oster rf_GenericWakeupFunc,
276 1.14 oster 1, 1, 4, 0, dag_h,
277 1.14 oster "Rod", allocList);
278 1.22 oster tmpNode->params[0].p = pda;
279 1.22 oster tmpNode->params[1].p = pda->bufPtr;
280 1.22 oster tmpNode->params[2].v = parityStripeID;
281 1.22 oster tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
282 1.17 oster which_ru);
283 1.3 oster nodeNum++;
284 1.3 oster pda = pda->next;
285 1.22 oster tmpNode = tmpNode->list_next;
286 1.3 oster }
287 1.3 oster }
288 1.3 oster }
289 1.3 oster RF_ASSERT(nodeNum == nRodNodes);
290 1.3 oster
291 1.3 oster /* initialize the wnd nodes */
292 1.3 oster pda = asmap->physInfo;
293 1.22 oster tmpNode = wndNodes;
294 1.3 oster for (i = 0; i < nWndNodes; i++) {
295 1.22 oster rf_InitNode(tmpNode, rf_wait, RF_FALSE,
296 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
297 1.14 oster rf_GenericWakeupFunc, 1, 1, 4, 0,
298 1.14 oster dag_h, "Wnd", allocList);
299 1.3 oster RF_ASSERT(pda != NULL);
300 1.22 oster tmpNode->params[0].p = pda;
301 1.22 oster tmpNode->params[1].p = pda->bufPtr;
302 1.22 oster tmpNode->params[2].v = parityStripeID;
303 1.22 oster tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
304 1.3 oster pda = pda->next;
305 1.22 oster tmpNode = tmpNode->list_next;
306 1.3 oster }
307 1.3 oster
308 1.3 oster /* initialize the redundancy node */
309 1.3 oster if (nRodNodes > 0) {
310 1.14 oster rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
311 1.14 oster rf_NullNodeUndoFunc, NULL, 1,
312 1.14 oster nRodNodes, 2 * (nWndNodes + nRodNodes) + 1,
313 1.14 oster nfaults, dag_h, "Xr ", allocList);
314 1.3 oster } else {
315 1.14 oster rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
316 1.14 oster rf_NullNodeUndoFunc, NULL, 1,
317 1.14 oster 1, 2 * (nWndNodes + nRodNodes) + 1,
318 1.14 oster nfaults, dag_h, "Xr ", allocList);
319 1.3 oster }
320 1.3 oster xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
321 1.22 oster tmpNode = wndNodes;
322 1.3 oster for (i = 0; i < nWndNodes; i++) {
323 1.14 oster /* pda */
324 1.22 oster xorNode->params[2 * i + 0] = tmpNode->params[0];
325 1.14 oster /* buf ptr */
326 1.22 oster xorNode->params[2 * i + 1] = tmpNode->params[1];
327 1.22 oster tmpNode = tmpNode->list_next;
328 1.3 oster }
329 1.22 oster tmpNode = rodNodes;
330 1.3 oster for (i = 0; i < nRodNodes; i++) {
331 1.14 oster /* pda */
332 1.22 oster xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0];
333 1.14 oster /* buf ptr */
334 1.22 oster xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1];
335 1.22 oster tmpNode = tmpNode->list_next;
336 1.3 oster }
337 1.3 oster /* xor node needs to get at RAID information */
338 1.3 oster xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
339 1.3 oster
340 1.3 oster /*
341 1.14 oster * Look for an Rod node that reads a complete SU. If none,
342 1.14 oster * alloc a buffer to receive the parity info. Note that we
343 1.14 oster * can't use a new data buffer because it will not have gotten
344 1.14 oster * written when the xor occurs. */
345 1.3 oster if (allowBufferRecycle) {
346 1.22 oster tmpNode = rodNodes;
347 1.3 oster for (i = 0; i < nRodNodes; i++) {
348 1.22 oster if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
349 1.3 oster break;
350 1.22 oster tmpNode = tmpNode->list_next;
351 1.3 oster }
352 1.3 oster }
353 1.3 oster if ((!allowBufferRecycle) || (i == nRodNodes)) {
354 1.27 oster xorNode->results[0] = rf_AllocBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit));
355 1.3 oster } else {
356 1.22 oster /* this works because the only way we get here is if
357 1.22 oster allowBufferRecycle is true and we went through the
358 1.22 oster above for loop, and exited via the break before
359 1.22 oster i==nRodNodes was true. That means tmpNode will
360 1.22 oster still point to a valid node -- the one we want for
361 1.22 oster here! */
362 1.22 oster xorNode->results[0] = tmpNode->params[1].p;
363 1.3 oster }
364 1.3 oster
365 1.3 oster /* initialize the Wnp node */
366 1.14 oster rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
367 1.14 oster rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
368 1.14 oster dag_h, "Wnp", allocList);
369 1.3 oster wnpNode->params[0].p = asmap->parityInfo;
370 1.3 oster wnpNode->params[1].p = xorNode->results[0];
371 1.3 oster wnpNode->params[2].v = parityStripeID;
372 1.17 oster wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
373 1.3 oster /* parityInfo must describe entire parity unit */
374 1.3 oster RF_ASSERT(asmap->parityInfo->next == NULL);
375 1.3 oster
376 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
377 1.3 oster if (nfaults == 2) {
378 1.3 oster /*
379 1.3 oster * We never try to recycle a buffer for the Q calcuation
380 1.3 oster * in addition to the parity. This would cause two buffers
381 1.3 oster * to get smashed during the P and Q calculation, guaranteeing
382 1.3 oster * one would be wrong.
383 1.3 oster */
384 1.12 oster RF_MallocAndAdd(xorNode->results[1],
385 1.12 oster rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
386 1.12 oster (void *), allocList);
387 1.14 oster rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
388 1.14 oster rf_DiskWriteUndoFunc, rf_GenericWakeupFunc,
389 1.14 oster 1, 1, 4, 0, dag_h, "Wnq", allocList);
390 1.3 oster wnqNode->params[0].p = asmap->qInfo;
391 1.3 oster wnqNode->params[1].p = xorNode->results[1];
392 1.3 oster wnqNode->params[2].v = parityStripeID;
393 1.17 oster wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
394 1.3 oster /* parityInfo must describe entire parity unit */
395 1.3 oster RF_ASSERT(asmap->parityInfo->next == NULL);
396 1.3 oster }
397 1.20 oster #endif
398 1.3 oster /*
399 1.3 oster * Connect nodes to form graph.
400 1.3 oster */
401 1.3 oster
402 1.3 oster /* connect dag header to block node */
403 1.3 oster RF_ASSERT(blockNode->numAntecedents == 0);
404 1.3 oster dag_h->succedents[0] = blockNode;
405 1.3 oster
406 1.3 oster if (nRodNodes > 0) {
407 1.3 oster /* connect the block node to the Rod nodes */
408 1.3 oster RF_ASSERT(blockNode->numSuccedents == nRodNodes);
409 1.3 oster RF_ASSERT(xorNode->numAntecedents == nRodNodes);
410 1.22 oster tmpNode = rodNodes;
411 1.3 oster for (i = 0; i < nRodNodes; i++) {
412 1.22 oster RF_ASSERT(tmpNode.numAntecedents == 1);
413 1.22 oster blockNode->succedents[i] = tmpNode;
414 1.22 oster tmpNode->antecedents[0] = blockNode;
415 1.22 oster tmpNode->antType[0] = rf_control;
416 1.3 oster
417 1.3 oster /* connect the Rod nodes to the Xor node */
418 1.22 oster RF_ASSERT(tmpNode.numSuccedents == 1);
419 1.22 oster tmpNode->succedents[0] = xorNode;
420 1.22 oster xorNode->antecedents[i] = tmpNode;
421 1.3 oster xorNode->antType[i] = rf_trueData;
422 1.22 oster tmpNode = tmpNode->list_next;
423 1.3 oster }
424 1.3 oster } else {
425 1.3 oster /* connect the block node to the Xor node */
426 1.3 oster RF_ASSERT(blockNode->numSuccedents == 1);
427 1.3 oster RF_ASSERT(xorNode->numAntecedents == 1);
428 1.3 oster blockNode->succedents[0] = xorNode;
429 1.3 oster xorNode->antecedents[0] = blockNode;
430 1.3 oster xorNode->antType[0] = rf_control;
431 1.3 oster }
432 1.3 oster
433 1.3 oster /* connect the xor node to the commit node */
434 1.3 oster RF_ASSERT(xorNode->numSuccedents == 1);
435 1.3 oster RF_ASSERT(commitNode->numAntecedents == 1);
436 1.3 oster xorNode->succedents[0] = commitNode;
437 1.3 oster commitNode->antecedents[0] = xorNode;
438 1.3 oster commitNode->antType[0] = rf_control;
439 1.3 oster
440 1.3 oster /* connect the commit node to the write nodes */
441 1.3 oster RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
442 1.22 oster tmpNode = wndNodes;
443 1.3 oster for (i = 0; i < nWndNodes; i++) {
444 1.3 oster RF_ASSERT(wndNodes->numAntecedents == 1);
445 1.22 oster commitNode->succedents[i] = tmpNode;
446 1.22 oster tmpNode->antecedents[0] = commitNode;
447 1.22 oster tmpNode->antType[0] = rf_control;
448 1.22 oster tmpNode = tmpNode->list_next;
449 1.3 oster }
450 1.3 oster RF_ASSERT(wnpNode->numAntecedents == 1);
451 1.3 oster commitNode->succedents[nWndNodes] = wnpNode;
452 1.3 oster wnpNode->antecedents[0] = commitNode;
453 1.3 oster wnpNode->antType[0] = rf_trueData;
454 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
455 1.3 oster if (nfaults == 2) {
456 1.3 oster RF_ASSERT(wnqNode->numAntecedents == 1);
457 1.3 oster commitNode->succedents[nWndNodes + 1] = wnqNode;
458 1.3 oster wnqNode->antecedents[0] = commitNode;
459 1.3 oster wnqNode->antType[0] = rf_trueData;
460 1.3 oster }
461 1.20 oster #endif
462 1.3 oster /* connect the write nodes to the term node */
463 1.3 oster RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
464 1.3 oster RF_ASSERT(termNode->numSuccedents == 0);
465 1.22 oster tmpNode = wndNodes;
466 1.3 oster for (i = 0; i < nWndNodes; i++) {
467 1.3 oster RF_ASSERT(wndNodes->numSuccedents == 1);
468 1.22 oster tmpNode->succedents[0] = termNode;
469 1.22 oster termNode->antecedents[i] = tmpNode;
470 1.3 oster termNode->antType[i] = rf_control;
471 1.22 oster tmpNode = tmpNode->list_next;
472 1.3 oster }
473 1.3 oster RF_ASSERT(wnpNode->numSuccedents == 1);
474 1.3 oster wnpNode->succedents[0] = termNode;
475 1.3 oster termNode->antecedents[nWndNodes] = wnpNode;
476 1.3 oster termNode->antType[nWndNodes] = rf_control;
477 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
478 1.3 oster if (nfaults == 2) {
479 1.3 oster RF_ASSERT(wnqNode->numSuccedents == 1);
480 1.3 oster wnqNode->succedents[0] = termNode;
481 1.3 oster termNode->antecedents[nWndNodes + 1] = wnqNode;
482 1.3 oster termNode->antType[nWndNodes + 1] = rf_control;
483 1.3 oster }
484 1.20 oster #endif
485 1.1 oster }
486 1.1 oster /******************************************************************************
487 1.1 oster *
488 1.1 oster * creates a DAG to perform a small-write operation (either raid 5 or pq),
489 1.1 oster * which is as follows:
490 1.1 oster *
491 1.1 oster * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
492 1.1 oster * \- Rod X / \----> Wnd [Und]-/
493 1.1 oster * [\- Rod X / \---> Wnd [Und]-/]
494 1.1 oster * [\- Roq -> Q / \--> Wnq [Unq]-/]
495 1.1 oster *
496 1.1 oster * Rop = read old parity
497 1.1 oster * Rod = read old data
498 1.1 oster * Roq = read old "q"
499 1.1 oster * Cmt = commit node
500 1.1 oster * Und = unlock data disk
501 1.1 oster * Unp = unlock parity disk
502 1.1 oster * Unq = unlock q disk
503 1.1 oster * Wnp = write new parity
504 1.1 oster * Wnd = write new data
505 1.1 oster * Wnq = write new "q"
506 1.1 oster * [ ] denotes optional segments in the graph
507 1.1 oster *
508 1.1 oster * Parameters: raidPtr - description of the physical array
509 1.1 oster * asmap - logical & physical addresses for this access
510 1.1 oster * bp - buffer ptr (holds write data)
511 1.3 oster * flags - general flags (e.g. disk locking)
512 1.1 oster * allocList - list of memory allocated in DAG creation
513 1.1 oster * pfuncs - list of parity generating functions
514 1.1 oster * qfuncs - list of q generating functions
515 1.1 oster *
516 1.1 oster * A null qfuncs indicates single fault tolerant
517 1.1 oster *****************************************************************************/
518 1.1 oster
519 1.3 oster void
520 1.13 oster rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
521 1.13 oster RF_DagHeader_t *dag_h, void *bp,
522 1.13 oster RF_RaidAccessFlags_t flags,
523 1.13 oster RF_AllocListElem_t *allocList,
524 1.13 oster const RF_RedFuncs_t *pfuncs,
525 1.13 oster const RF_RedFuncs_t *qfuncs)
526 1.1 oster {
527 1.3 oster RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode;
528 1.22 oster RF_DagNode_t *tmpNode, *tmpreadDataNode, *tmpreadParityNode;
529 1.22 oster RF_DagNode_t *xorNodes, *qNodes, *blockNode, *commitNode;
530 1.3 oster RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes;
531 1.22 oster RF_DagNode_t *tmpxorNode, *tmpqNode, *tmpwriteDataNode, *tmpreadQNode;
532 1.22 oster RF_DagNode_t *tmpwriteParityNode;
533 1.22 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
534 1.22 oster RF_DagNode_t *tmpwriteQNode;
535 1.22 oster #endif
536 1.16 oster int i, j, nNodes, totalNumNodes;
537 1.3 oster RF_ReconUnitNum_t which_ru;
538 1.3 oster int (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
539 1.3 oster int (*qfunc) (RF_DagNode_t *);
540 1.3 oster int numDataNodes, numParityNodes;
541 1.3 oster RF_StripeNum_t parityStripeID;
542 1.3 oster RF_PhysDiskAddr_t *pda;
543 1.3 oster char *name, *qname;
544 1.3 oster long nfaults;
545 1.3 oster
546 1.3 oster nfaults = qfuncs ? 2 : 1;
547 1.3 oster
548 1.3 oster parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
549 1.3 oster asmap->raidAddress, &which_ru);
550 1.3 oster pda = asmap->physInfo;
551 1.3 oster numDataNodes = asmap->numStripeUnitsAccessed;
552 1.3 oster numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
553 1.3 oster
554 1.19 oster #if RF_DEBUG_DAG
555 1.3 oster if (rf_dagDebug) {
556 1.3 oster printf("[Creating small-write DAG]\n");
557 1.3 oster }
558 1.19 oster #endif
559 1.3 oster RF_ASSERT(numDataNodes > 0);
560 1.3 oster dag_h->creator = "SmallWriteDAG";
561 1.3 oster
562 1.3 oster dag_h->numCommitNodes = 1;
563 1.3 oster dag_h->numCommits = 0;
564 1.3 oster dag_h->numSuccedents = 1;
565 1.3 oster
566 1.3 oster /*
567 1.3 oster * DAG creation occurs in four steps:
568 1.3 oster * 1. count the number of nodes in the DAG
569 1.3 oster * 2. create the nodes
570 1.3 oster * 3. initialize the nodes
571 1.3 oster * 4. connect the nodes
572 1.3 oster */
573 1.3 oster
574 1.3 oster /*
575 1.3 oster * Step 1. compute number of nodes in the graph
576 1.3 oster */
577 1.3 oster
578 1.14 oster /* number of nodes: a read and write for each data unit a
579 1.14 oster * redundancy computation node for each parity node (nfaults *
580 1.14 oster * nparity) a read and write for each parity unit a block and
581 1.14 oster * commit node (2) a terminate node if atomic RMW an unlock
582 1.14 oster * node for each data unit, redundancy unit */
583 1.3 oster totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
584 1.3 oster + (nfaults * 2 * numParityNodes) + 3;
585 1.3 oster /*
586 1.3 oster * Step 2. create the nodes
587 1.3 oster */
588 1.22 oster
589 1.22 oster blockNode = rf_AllocDAGNode();
590 1.22 oster blockNode->list_next = dag_h->nodes;
591 1.22 oster dag_h->nodes = blockNode;
592 1.22 oster
593 1.22 oster commitNode = rf_AllocDAGNode();
594 1.22 oster commitNode->list_next = dag_h->nodes;
595 1.22 oster dag_h->nodes = commitNode;
596 1.22 oster
597 1.22 oster for (i = 0; i < numDataNodes; i++) {
598 1.22 oster tmpNode = rf_AllocDAGNode();
599 1.22 oster tmpNode->list_next = dag_h->nodes;
600 1.22 oster dag_h->nodes = tmpNode;
601 1.22 oster }
602 1.22 oster readDataNodes = dag_h->nodes;
603 1.22 oster
604 1.22 oster for (i = 0; i < numParityNodes; i++) {
605 1.22 oster tmpNode = rf_AllocDAGNode();
606 1.22 oster tmpNode->list_next = dag_h->nodes;
607 1.22 oster dag_h->nodes = tmpNode;
608 1.22 oster }
609 1.22 oster readParityNodes = dag_h->nodes;
610 1.22 oster
611 1.22 oster for (i = 0; i < numDataNodes; i++) {
612 1.22 oster tmpNode = rf_AllocDAGNode();
613 1.22 oster tmpNode->list_next = dag_h->nodes;
614 1.22 oster dag_h->nodes = tmpNode;
615 1.22 oster }
616 1.22 oster writeDataNodes = dag_h->nodes;
617 1.22 oster
618 1.22 oster for (i = 0; i < numParityNodes; i++) {
619 1.22 oster tmpNode = rf_AllocDAGNode();
620 1.22 oster tmpNode->list_next = dag_h->nodes;
621 1.22 oster dag_h->nodes = tmpNode;
622 1.22 oster }
623 1.22 oster writeParityNodes = dag_h->nodes;
624 1.22 oster
625 1.22 oster for (i = 0; i < numParityNodes; i++) {
626 1.22 oster tmpNode = rf_AllocDAGNode();
627 1.22 oster tmpNode->list_next = dag_h->nodes;
628 1.22 oster dag_h->nodes = tmpNode;
629 1.22 oster }
630 1.22 oster xorNodes = dag_h->nodes;
631 1.22 oster
632 1.22 oster termNode = rf_AllocDAGNode();
633 1.22 oster termNode->list_next = dag_h->nodes;
634 1.22 oster dag_h->nodes = termNode;
635 1.16 oster
636 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
637 1.3 oster if (nfaults == 2) {
638 1.22 oster for (i = 0; i < numParityNodes; i++) {
639 1.22 oster tmpNode = rf_AllocDAGNode();
640 1.22 oster tmpNode->list_next = dag_h->nodes;
641 1.22 oster dag_h->nodes = tmpNode;
642 1.22 oster }
643 1.22 oster readQNodes = dag_h->nodes;
644 1.22 oster
645 1.22 oster for (i = 0; i < numParityNodes; i++) {
646 1.22 oster tmpNode = rf_AllocDAGNode();
647 1.22 oster tmpNode->list_next = dag_h->nodes;
648 1.22 oster dag_h->nodes = tmpNode;
649 1.22 oster }
650 1.22 oster writeQNodes = dag_h->nodes;
651 1.22 oster
652 1.22 oster for (i = 0; i < numParityNodes; i++) {
653 1.22 oster tmpNode = rf_AllocDAGNode();
654 1.22 oster tmpNode->list_next = dag_h->nodes;
655 1.22 oster dag_h->nodes = tmpNode;
656 1.22 oster }
657 1.22 oster qNodes = dag_h->nodes;
658 1.3 oster } else {
659 1.20 oster #endif
660 1.18 oster readQNodes = writeQNodes = qNodes = NULL;
661 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
662 1.3 oster }
663 1.20 oster #endif
664 1.3 oster
665 1.3 oster /*
666 1.3 oster * Step 3. initialize the nodes
667 1.3 oster */
668 1.3 oster /* initialize block node (Nil) */
669 1.3 oster nNodes = numDataNodes + (nfaults * numParityNodes);
670 1.14 oster rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
671 1.14 oster rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0,
672 1.14 oster dag_h, "Nil", allocList);
673 1.3 oster
674 1.3 oster /* initialize commit node (Cmt) */
675 1.14 oster rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
676 1.14 oster rf_NullNodeUndoFunc, NULL, nNodes,
677 1.14 oster (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
678 1.3 oster
679 1.3 oster /* initialize terminate node (Trm) */
680 1.14 oster rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
681 1.14 oster rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0,
682 1.14 oster dag_h, "Trm", allocList);
683 1.3 oster
684 1.3 oster /* initialize nodes which read old data (Rod) */
685 1.22 oster tmpreadDataNode = readDataNodes;
686 1.3 oster for (i = 0; i < numDataNodes; i++) {
687 1.22 oster rf_InitNode(tmpreadDataNode, rf_wait, RF_FALSE,
688 1.14 oster rf_DiskReadFunc, rf_DiskReadUndoFunc,
689 1.14 oster rf_GenericWakeupFunc, (nfaults * numParityNodes),
690 1.14 oster 1, 4, 0, dag_h, "Rod", allocList);
691 1.3 oster RF_ASSERT(pda != NULL);
692 1.3 oster /* physical disk addr desc */
693 1.22 oster tmpreadDataNode->params[0].p = pda;
694 1.3 oster /* buffer to hold old data */
695 1.27 oster tmpreadDataNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
696 1.22 oster tmpreadDataNode->params[2].v = parityStripeID;
697 1.22 oster tmpreadDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
698 1.17 oster which_ru);
699 1.3 oster pda = pda->next;
700 1.22 oster for (j = 0; j < tmpreadDataNode->numSuccedents; j++) {
701 1.22 oster tmpreadDataNode->propList[j] = NULL;
702 1.3 oster }
703 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
704 1.3 oster }
705 1.3 oster
706 1.3 oster /* initialize nodes which read old parity (Rop) */
707 1.3 oster pda = asmap->parityInfo;
708 1.3 oster i = 0;
709 1.22 oster tmpreadParityNode = readParityNodes;
710 1.3 oster for (i = 0; i < numParityNodes; i++) {
711 1.3 oster RF_ASSERT(pda != NULL);
712 1.22 oster rf_InitNode(tmpreadParityNode, rf_wait, RF_FALSE,
713 1.14 oster rf_DiskReadFunc, rf_DiskReadUndoFunc,
714 1.14 oster rf_GenericWakeupFunc, numParityNodes, 1, 4, 0,
715 1.14 oster dag_h, "Rop", allocList);
716 1.22 oster tmpreadParityNode->params[0].p = pda;
717 1.3 oster /* buffer to hold old parity */
718 1.27 oster tmpreadParityNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
719 1.22 oster tmpreadParityNode->params[2].v = parityStripeID;
720 1.22 oster tmpreadParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
721 1.17 oster which_ru);
722 1.3 oster pda = pda->next;
723 1.22 oster for (j = 0; j < tmpreadParityNode->numSuccedents; j++) {
724 1.22 oster tmpreadParityNode->propList[0] = NULL;
725 1.3 oster }
726 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
727 1.3 oster }
728 1.3 oster
729 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
730 1.3 oster /* initialize nodes which read old Q (Roq) */
731 1.3 oster if (nfaults == 2) {
732 1.3 oster pda = asmap->qInfo;
733 1.22 oster tmpreadQNode = readQNodes;
734 1.3 oster for (i = 0; i < numParityNodes; i++) {
735 1.3 oster RF_ASSERT(pda != NULL);
736 1.22 oster rf_InitNode(tmpreadQNode, rf_wait, RF_FALSE,
737 1.14 oster rf_DiskReadFunc, rf_DiskReadUndoFunc,
738 1.14 oster rf_GenericWakeupFunc, numParityNodes,
739 1.14 oster 1, 4, 0, dag_h, "Roq", allocList);
740 1.22 oster tmpreadQNode->params[0].p = pda;
741 1.3 oster /* buffer to hold old Q */
742 1.24 oster tmpreadQNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h,
743 1.24 oster pda->numSector << raidPtr->logBytesPerSector);
744 1.22 oster tmpreadQNode->params[2].v = parityStripeID;
745 1.22 oster tmpreadQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
746 1.17 oster which_ru);
747 1.3 oster pda = pda->next;
748 1.22 oster for (j = 0; j < tmpreadQNode->numSuccedents; j++) {
749 1.22 oster tmpreadQNode->propList[0] = NULL;
750 1.3 oster }
751 1.22 oster tmpreadQNode = tmpreadQNode->list_next;
752 1.3 oster }
753 1.3 oster }
754 1.20 oster #endif
755 1.3 oster /* initialize nodes which write new data (Wnd) */
756 1.3 oster pda = asmap->physInfo;
757 1.22 oster tmpwriteDataNode = writeDataNodes;
758 1.3 oster for (i = 0; i < numDataNodes; i++) {
759 1.3 oster RF_ASSERT(pda != NULL);
760 1.22 oster rf_InitNode(tmpwriteDataNode, rf_wait, RF_FALSE,
761 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
762 1.14 oster rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
763 1.14 oster "Wnd", allocList);
764 1.3 oster /* physical disk addr desc */
765 1.22 oster tmpwriteDataNode->params[0].p = pda;
766 1.3 oster /* buffer holding new data to be written */
767 1.22 oster tmpwriteDataNode->params[1].p = pda->bufPtr;
768 1.22 oster tmpwriteDataNode->params[2].v = parityStripeID;
769 1.22 oster tmpwriteDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
770 1.17 oster which_ru);
771 1.3 oster pda = pda->next;
772 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
773 1.3 oster }
774 1.3 oster
775 1.3 oster /*
776 1.3 oster * Initialize nodes which compute new parity and Q.
777 1.3 oster */
778 1.3 oster /*
779 1.3 oster * We use the simple XOR func in the double-XOR case, and when
780 1.14 oster * we're accessing only a portion of one stripe unit. The
781 1.14 oster * distinction between the two is that the regular XOR func
782 1.14 oster * assumes that the targbuf is a full SU in size, and examines
783 1.14 oster * the pda associated with the buffer to decide where within
784 1.14 oster * the buffer to XOR the data, whereas the simple XOR func
785 1.14 oster * just XORs the data into the start of the buffer. */
786 1.3 oster if ((numParityNodes == 2) || ((numDataNodes == 1)
787 1.14 oster && (asmap->totalSectorsAccessed <
788 1.14 oster raidPtr->Layout.sectorsPerStripeUnit))) {
789 1.3 oster func = pfuncs->simple;
790 1.3 oster undoFunc = rf_NullNodeUndoFunc;
791 1.3 oster name = pfuncs->SimpleName;
792 1.3 oster if (qfuncs) {
793 1.3 oster qfunc = qfuncs->simple;
794 1.3 oster qname = qfuncs->SimpleName;
795 1.3 oster } else {
796 1.3 oster qfunc = NULL;
797 1.3 oster qname = NULL;
798 1.3 oster }
799 1.3 oster } else {
800 1.3 oster func = pfuncs->regular;
801 1.3 oster undoFunc = rf_NullNodeUndoFunc;
802 1.3 oster name = pfuncs->RegularName;
803 1.3 oster if (qfuncs) {
804 1.3 oster qfunc = qfuncs->regular;
805 1.3 oster qname = qfuncs->RegularName;
806 1.3 oster } else {
807 1.3 oster qfunc = NULL;
808 1.3 oster qname = NULL;
809 1.3 oster }
810 1.3 oster }
811 1.3 oster /*
812 1.3 oster * Initialize the xor nodes: params are {pda,buf}
813 1.3 oster * from {Rod,Wnd,Rop} nodes, and raidPtr
814 1.3 oster */
815 1.3 oster if (numParityNodes == 2) {
816 1.3 oster /* double-xor case */
817 1.22 oster tmpxorNode = xorNodes;
818 1.22 oster tmpreadDataNode = readDataNodes;
819 1.22 oster tmpreadParityNode = readParityNodes;
820 1.22 oster tmpwriteDataNode = writeDataNodes;
821 1.22 oster tmpqNode = qNodes;
822 1.22 oster tmpreadQNode = readQNodes;
823 1.3 oster for (i = 0; i < numParityNodes; i++) {
824 1.3 oster /* note: no wakeup func for xor */
825 1.22 oster rf_InitNode(tmpxorNode, rf_wait, RF_FALSE, func,
826 1.14 oster undoFunc, NULL, 1,
827 1.14 oster (numDataNodes + numParityNodes),
828 1.14 oster 7, 1, dag_h, name, allocList);
829 1.22 oster tmpxorNode->flags |= RF_DAGNODE_FLAG_YIELD;
830 1.22 oster tmpxorNode->params[0] = tmpreadDataNode->params[0];
831 1.22 oster tmpxorNode->params[1] = tmpreadDataNode->params[1];
832 1.22 oster tmpxorNode->params[2] = tmpreadParityNode->params[0];
833 1.22 oster tmpxorNode->params[3] = tmpreadParityNode->params[1];
834 1.22 oster tmpxorNode->params[4] = tmpwriteDataNode->params[0];
835 1.22 oster tmpxorNode->params[5] = tmpwriteDataNode->params[1];
836 1.22 oster tmpxorNode->params[6].p = raidPtr;
837 1.3 oster /* use old parity buf as target buf */
838 1.22 oster tmpxorNode->results[0] = tmpreadParityNode->params[1].p;
839 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
840 1.3 oster if (nfaults == 2) {
841 1.3 oster /* note: no wakeup func for qor */
842 1.22 oster rf_InitNode(tmpqNode, rf_wait, RF_FALSE,
843 1.14 oster qfunc, undoFunc, NULL, 1,
844 1.14 oster (numDataNodes + numParityNodes),
845 1.14 oster 7, 1, dag_h, qname, allocList);
846 1.22 oster tmpqNode->params[0] = tmpreadDataNode->params[0];
847 1.22 oster tmpqNode->params[1] = tmpreadDataNode->params[1];
848 1.22 oster tmpqNode->params[2] = tmpreadQNode->.params[0];
849 1.22 oster tmpqNode->params[3] = tmpreadQNode->params[1];
850 1.22 oster tmpqNode->params[4] = tmpwriteDataNode->params[0];
851 1.22 oster tmpqNode->params[5] = tmpwriteDataNode->params[1];
852 1.22 oster tmpqNode->params[6].p = raidPtr;
853 1.3 oster /* use old Q buf as target buf */
854 1.22 oster tmpqNode->results[0] = tmpreadQNode->params[1].p;
855 1.22 oster tmpqNode = tmpqNode->list_next;
856 1.22 oster tmpreadQNodes = tmpreadQNodes->list_next;
857 1.3 oster }
858 1.20 oster #endif
859 1.22 oster tmpxorNode = tmpxorNode->list_next;
860 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
861 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
862 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
863 1.3 oster }
864 1.3 oster } else {
865 1.3 oster /* there is only one xor node in this case */
866 1.22 oster rf_InitNode(xorNodes, rf_wait, RF_FALSE, func,
867 1.14 oster undoFunc, NULL, 1, (numDataNodes + numParityNodes),
868 1.14 oster (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
869 1.14 oster dag_h, name, allocList);
870 1.22 oster xorNodes->flags |= RF_DAGNODE_FLAG_YIELD;
871 1.22 oster tmpreadDataNode = readDataNodes;
872 1.22 oster for (i = 0; i < numDataNodes; i++) { /* used to be"numDataNodes + 1" until we factored
873 1.22 oster out the "+1" into the "deal with Rop separately below */
874 1.22 oster /* set up params related to Rod nodes */
875 1.22 oster xorNodes->params[2 * i + 0] = tmpreadDataNode->params[0]; /* pda */
876 1.22 oster xorNodes->params[2 * i + 1] = tmpreadDataNode->params[1]; /* buffer ptr */
877 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
878 1.22 oster }
879 1.22 oster /* deal with Rop separately */
880 1.22 oster xorNodes->params[2 * numDataNodes + 0] = readParityNodes->params[0]; /* pda */
881 1.22 oster xorNodes->params[2 * numDataNodes + 1] = readParityNodes->params[1]; /* buffer ptr */
882 1.22 oster
883 1.22 oster tmpwriteDataNode = writeDataNodes;
884 1.3 oster for (i = 0; i < numDataNodes; i++) {
885 1.3 oster /* set up params related to Wnd and Wnp nodes */
886 1.22 oster xorNodes->params[2 * (numDataNodes + 1 + i) + 0] = /* pda */
887 1.22 oster tmpwriteDataNode->params[0];
888 1.22 oster xorNodes->params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */
889 1.22 oster tmpwriteDataNode->params[1];
890 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
891 1.3 oster }
892 1.3 oster /* xor node needs to get at RAID information */
893 1.22 oster xorNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
894 1.22 oster xorNodes->results[0] = readParityNodes->params[1].p;
895 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
896 1.3 oster if (nfaults == 2) {
897 1.22 oster rf_InitNode(qNodes, rf_wait, RF_FALSE, qfunc,
898 1.14 oster undoFunc, NULL, 1,
899 1.14 oster (numDataNodes + numParityNodes),
900 1.14 oster (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
901 1.14 oster dag_h, qname, allocList);
902 1.22 oster tmpreadDataNode = readDataNodes;
903 1.3 oster for (i = 0; i < numDataNodes; i++) {
904 1.3 oster /* set up params related to Rod */
905 1.22 oster qNodes->params[2 * i + 0] = tmpreadDataNode->params[0]; /* pda */
906 1.22 oster qNodes->params[2 * i + 1] = tmpreadDataNode->params[1]; /* buffer ptr */
907 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
908 1.3 oster }
909 1.3 oster /* and read old q */
910 1.22 oster qNodes->params[2 * numDataNodes + 0] = /* pda */
911 1.22 oster readQNodes->params[0];
912 1.22 oster qNodes->params[2 * numDataNodes + 1] = /* buffer ptr */
913 1.22 oster readQNodes->params[1];
914 1.22 oster tmpwriteDataNode = writeDataNodes;
915 1.3 oster for (i = 0; i < numDataNodes; i++) {
916 1.3 oster /* set up params related to Wnd nodes */
917 1.22 oster qNodes->params[2 * (numDataNodes + 1 + i) + 0] = /* pda */
918 1.22 oster tmpwriteDataNode->params[0];
919 1.22 oster qNodes->params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */
920 1.22 oster tmpwriteDataNode->params[1];
921 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
922 1.3 oster }
923 1.3 oster /* xor node needs to get at RAID information */
924 1.22 oster qNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
925 1.22 oster qNodes->results[0] = readQNodes->params[1].p;
926 1.3 oster }
927 1.20 oster #endif
928 1.3 oster }
929 1.3 oster
930 1.3 oster /* initialize nodes which write new parity (Wnp) */
931 1.3 oster pda = asmap->parityInfo;
932 1.22 oster tmpwriteParityNode = writeParityNodes;
933 1.22 oster tmpxorNode = xorNodes;
934 1.3 oster for (i = 0; i < numParityNodes; i++) {
935 1.22 oster rf_InitNode(tmpwriteParityNode, rf_wait, RF_FALSE,
936 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
937 1.14 oster rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
938 1.14 oster "Wnp", allocList);
939 1.3 oster RF_ASSERT(pda != NULL);
940 1.22 oster tmpwriteParityNode->params[0].p = pda; /* param 1 (bufPtr)
941 1.22 oster * filled in by xor node */
942 1.22 oster tmpwriteParityNode->params[1].p = tmpxorNode->results[0]; /* buffer pointer for
943 1.22 oster * parity write
944 1.22 oster * operation */
945 1.22 oster tmpwriteParityNode->params[2].v = parityStripeID;
946 1.22 oster tmpwriteParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
947 1.17 oster which_ru);
948 1.3 oster pda = pda->next;
949 1.22 oster tmpwriteParityNode = tmpwriteParityNode->list_next;
950 1.22 oster tmpxorNode = tmpxorNode->list_next;
951 1.3 oster }
952 1.3 oster
953 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
954 1.3 oster /* initialize nodes which write new Q (Wnq) */
955 1.3 oster if (nfaults == 2) {
956 1.3 oster pda = asmap->qInfo;
957 1.22 oster tmpwriteQNode = writeQNodes;
958 1.22 oster tmpqNode = qNodes;
959 1.3 oster for (i = 0; i < numParityNodes; i++) {
960 1.22 oster rf_InitNode(tmpwriteQNode, rf_wait, RF_FALSE,
961 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
962 1.14 oster rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
963 1.14 oster "Wnq", allocList);
964 1.3 oster RF_ASSERT(pda != NULL);
965 1.22 oster tmpwriteQNode->params[0].p = pda; /* param 1 (bufPtr)
966 1.3 oster * filled in by xor node */
967 1.22 oster tmpwriteQNode->params[1].p = tmpqNode->results[0]; /* buffer pointer for
968 1.3 oster * parity write
969 1.3 oster * operation */
970 1.22 oster tmpwriteQNode->params[2].v = parityStripeID;
971 1.22 oster tmpwriteQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
972 1.17 oster which_ru);
973 1.3 oster pda = pda->next;
974 1.22 oster tmpwriteQNode = tmpwriteQNode->list_next;
975 1.22 oster tmpqNode = tmpqNode->list_next;
976 1.3 oster }
977 1.3 oster }
978 1.20 oster #endif
979 1.3 oster /*
980 1.3 oster * Step 4. connect the nodes.
981 1.3 oster */
982 1.3 oster
983 1.3 oster /* connect header to block node */
984 1.3 oster dag_h->succedents[0] = blockNode;
985 1.3 oster
986 1.3 oster /* connect block node to read old data nodes */
987 1.3 oster RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
988 1.22 oster tmpreadDataNode = readDataNodes;
989 1.3 oster for (i = 0; i < numDataNodes; i++) {
990 1.22 oster blockNode->succedents[i] = tmpreadDataNode;
991 1.22 oster RF_ASSERT(tmpreadDataNode->numAntecedents == 1);
992 1.22 oster tmpreadDataNode->antecedents[0] = blockNode;
993 1.22 oster tmpreadDataNode->antType[0] = rf_control;
994 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
995 1.3 oster }
996 1.3 oster
997 1.3 oster /* connect block node to read old parity nodes */
998 1.22 oster tmpreadParityNode = readParityNodes;
999 1.3 oster for (i = 0; i < numParityNodes; i++) {
1000 1.22 oster blockNode->succedents[numDataNodes + i] = tmpreadParityNode;
1001 1.22 oster RF_ASSERT(tmpreadParityNode->numAntecedents == 1);
1002 1.22 oster tmpreadParityNode->antecedents[0] = blockNode;
1003 1.22 oster tmpreadParityNode->antType[0] = rf_control;
1004 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
1005 1.3 oster }
1006 1.3 oster
1007 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1008 1.3 oster /* connect block node to read old Q nodes */
1009 1.3 oster if (nfaults == 2) {
1010 1.22 oster tmpreadQNode = readQNodes;
1011 1.3 oster for (i = 0; i < numParityNodes; i++) {
1012 1.22 oster blockNode->succedents[numDataNodes + numParityNodes + i] = tmpreadQNode;
1013 1.22 oster RF_ASSERT(tmpreadQNode->numAntecedents == 1);
1014 1.22 oster tmpreadQNode->antecedents[0] = blockNode;
1015 1.22 oster tmpreadQNode->antType[0] = rf_control;
1016 1.22 oster tmpreadQNode = tmpreadQNode->list_next;
1017 1.3 oster }
1018 1.3 oster }
1019 1.20 oster #endif
1020 1.3 oster /* connect read old data nodes to xor nodes */
1021 1.22 oster tmpreadDataNode = readDataNodes;
1022 1.3 oster for (i = 0; i < numDataNodes; i++) {
1023 1.22 oster RF_ASSERT(tmpreadDataNode->numSuccedents == (nfaults * numParityNodes));
1024 1.22 oster tmpxorNode = xorNodes;
1025 1.3 oster for (j = 0; j < numParityNodes; j++) {
1026 1.22 oster RF_ASSERT(tmpxorNode->numAntecedents == numDataNodes + numParityNodes);
1027 1.22 oster tmpreadDataNode->succedents[j] = tmpxorNode;
1028 1.22 oster tmpxorNode->antecedents[i] = tmpreadDataNode;
1029 1.22 oster tmpxorNode->antType[i] = rf_trueData;
1030 1.22 oster tmpxorNode = tmpxorNode->list_next;
1031 1.3 oster }
1032 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
1033 1.3 oster }
1034 1.3 oster
1035 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1036 1.3 oster /* connect read old data nodes to q nodes */
1037 1.3 oster if (nfaults == 2) {
1038 1.22 oster tmpreadDataNode = readDataNodes;
1039 1.3 oster for (i = 0; i < numDataNodes; i++) {
1040 1.22 oster tmpqNode = qNodes;
1041 1.3 oster for (j = 0; j < numParityNodes; j++) {
1042 1.22 oster RF_ASSERT(tmpqNode->numAntecedents == numDataNodes + numParityNodes);
1043 1.22 oster tmpreadDataNode->succedents[numParityNodes + j] = tmpqNode;
1044 1.22 oster tmpqNode->antecedents[i] = tmpreadDataNode;
1045 1.22 oster tmpqNode->antType[i] = rf_trueData;
1046 1.22 oster tmpqNode = tmpqNode->list_next;
1047 1.3 oster }
1048 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
1049 1.3 oster }
1050 1.3 oster }
1051 1.20 oster #endif
1052 1.3 oster /* connect read old parity nodes to xor nodes */
1053 1.22 oster tmpreadParityNode = readParityNodes;
1054 1.3 oster for (i = 0; i < numParityNodes; i++) {
1055 1.22 oster RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1056 1.22 oster tmpxorNode = xorNodes;
1057 1.3 oster for (j = 0; j < numParityNodes; j++) {
1058 1.22 oster tmpreadParityNode->succedents[j] = tmpxorNode;
1059 1.22 oster tmpxorNode->antecedents[numDataNodes + i] = tmpreadParityNode;
1060 1.22 oster tmpxorNode->antType[numDataNodes + i] = rf_trueData;
1061 1.22 oster tmpxorNode = tmpxorNode->list_next;
1062 1.3 oster }
1063 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
1064 1.3 oster }
1065 1.3 oster
1066 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1067 1.3 oster /* connect read old q nodes to q nodes */
1068 1.3 oster if (nfaults == 2) {
1069 1.22 oster tmpreadParityNode = readParityNodes;
1070 1.22 oster tmpreadQNode = readQNodes;
1071 1.3 oster for (i = 0; i < numParityNodes; i++) {
1072 1.22 oster RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1073 1.22 oster tmpqNode = qNodes;
1074 1.3 oster for (j = 0; j < numParityNodes; j++) {
1075 1.22 oster tmpreadQNode->succedents[j] = tmpqNode;
1076 1.22 oster tmpqNode->antecedents[numDataNodes + i] = tmpreadQNodes;
1077 1.22 oster tmpqNode->antType[numDataNodes + i] = rf_trueData;
1078 1.22 oster tmpqNode = tmpqNode->list_next;
1079 1.3 oster }
1080 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
1081 1.22 oster tmpreadQNode = tmpreadQNode->list_next;
1082 1.3 oster }
1083 1.3 oster }
1084 1.20 oster #endif
1085 1.3 oster /* connect xor nodes to commit node */
1086 1.3 oster RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
1087 1.22 oster tmpxorNode = xorNodes;
1088 1.3 oster for (i = 0; i < numParityNodes; i++) {
1089 1.22 oster RF_ASSERT(tmpxorNode->numSuccedents == 1);
1090 1.22 oster tmpxorNode->succedents[0] = commitNode;
1091 1.22 oster commitNode->antecedents[i] = tmpxorNode;
1092 1.3 oster commitNode->antType[i] = rf_control;
1093 1.22 oster tmpxorNode = tmpxorNode->list_next;
1094 1.3 oster }
1095 1.3 oster
1096 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1097 1.3 oster /* connect q nodes to commit node */
1098 1.3 oster if (nfaults == 2) {
1099 1.22 oster tmpqNode = qNodes;
1100 1.3 oster for (i = 0; i < numParityNodes; i++) {
1101 1.22 oster RF_ASSERT(tmpqNode->numSuccedents == 1);
1102 1.22 oster tmpqNode->succedents[0] = commitNode;
1103 1.22 oster commitNode->antecedents[i + numParityNodes] = tmpqNode;
1104 1.3 oster commitNode->antType[i + numParityNodes] = rf_control;
1105 1.22 oster tmpqNode = tmpqNode->list_next;
1106 1.3 oster }
1107 1.3 oster }
1108 1.20 oster #endif
1109 1.3 oster /* connect commit node to write nodes */
1110 1.3 oster RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
1111 1.22 oster tmpwriteDataNode = writeDataNodes;
1112 1.3 oster for (i = 0; i < numDataNodes; i++) {
1113 1.22 oster RF_ASSERT(tmpwriteDataNodes->numAntecedents == 1);
1114 1.22 oster commitNode->succedents[i] = tmpwriteDataNode;
1115 1.22 oster tmpwriteDataNode->antecedents[0] = commitNode;
1116 1.22 oster tmpwriteDataNode->antType[0] = rf_trueData;
1117 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
1118 1.3 oster }
1119 1.22 oster tmpwriteParityNode = writeParityNodes;
1120 1.3 oster for (i = 0; i < numParityNodes; i++) {
1121 1.22 oster RF_ASSERT(tmpwriteParityNode->numAntecedents == 1);
1122 1.22 oster commitNode->succedents[i + numDataNodes] = tmpwriteParityNode;
1123 1.22 oster tmpwriteParityNode->antecedents[0] = commitNode;
1124 1.22 oster tmpwriteParityNode->antType[0] = rf_trueData;
1125 1.22 oster tmpwriteParityNode = tmpwriteParityNode->list_next;
1126 1.3 oster }
1127 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1128 1.3 oster if (nfaults == 2) {
1129 1.22 oster tmpwriteQNode = writeQNodes;
1130 1.3 oster for (i = 0; i < numParityNodes; i++) {
1131 1.22 oster RF_ASSERT(tmpwriteQNode->numAntecedents == 1);
1132 1.22 oster commitNode->succedents[i + numDataNodes + numParityNodes] = tmpwriteQNode;
1133 1.22 oster tmpwriteQNode->antecedents[0] = commitNode;
1134 1.22 oster tmpwriteQNode->antType[0] = rf_trueData;
1135 1.22 oster tmpwriteQNode = tmpwriteQNode->list_next;
1136 1.3 oster }
1137 1.3 oster }
1138 1.20 oster #endif
1139 1.3 oster RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1140 1.3 oster RF_ASSERT(termNode->numSuccedents == 0);
1141 1.22 oster tmpwriteDataNode = writeDataNodes;
1142 1.3 oster for (i = 0; i < numDataNodes; i++) {
1143 1.16 oster /* connect write new data nodes to term node */
1144 1.22 oster RF_ASSERT(tmpwriteDataNode->numSuccedents == 1);
1145 1.16 oster RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1146 1.22 oster tmpwriteDataNode->succedents[0] = termNode;
1147 1.22 oster termNode->antecedents[i] = tmpwriteDataNode;
1148 1.16 oster termNode->antType[i] = rf_control;
1149 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
1150 1.3 oster }
1151 1.3 oster
1152 1.22 oster tmpwriteParityNode = writeParityNodes;
1153 1.3 oster for (i = 0; i < numParityNodes; i++) {
1154 1.22 oster RF_ASSERT(tmpwriteParityNode->numSuccedents == 1);
1155 1.22 oster tmpwriteParityNode->succedents[0] = termNode;
1156 1.22 oster termNode->antecedents[numDataNodes + i] = tmpwriteParityNode;
1157 1.16 oster termNode->antType[numDataNodes + i] = rf_control;
1158 1.22 oster tmpwriteParityNode = tmpwriteParityNode->list_next;
1159 1.3 oster }
1160 1.3 oster
1161 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1162 1.3 oster if (nfaults == 2) {
1163 1.22 oster tmpwriteQNode = writeQNodes;
1164 1.3 oster for (i = 0; i < numParityNodes; i++) {
1165 1.22 oster RF_ASSERT(tmpwriteQNode->numSuccedents == 1);
1166 1.22 oster tmpwriteQNode->succedents[0] = termNode;
1167 1.22 oster termNode->antecedents[numDataNodes + numParityNodes + i] = tmpwriteQNode;
1168 1.16 oster termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
1169 1.22 oster tmpwriteQNode = tmpwriteQNode->list_next;
1170 1.3 oster }
1171 1.3 oster }
1172 1.20 oster #endif
1173 1.1 oster }
1174 1.1 oster
1175 1.1 oster
1176 1.1 oster /******************************************************************************
1177 1.1 oster * create a write graph (fault-free or degraded) for RAID level 1
1178 1.1 oster *
1179 1.1 oster * Hdr -> Commit -> Wpd -> Nil -> Trm
1180 1.1 oster * -> Wsd ->
1181 1.1 oster *
1182 1.1 oster * The "Wpd" node writes data to the primary copy in the mirror pair
1183 1.1 oster * The "Wsd" node writes data to the secondary copy in the mirror pair
1184 1.1 oster *
1185 1.1 oster * Parameters: raidPtr - description of the physical array
1186 1.1 oster * asmap - logical & physical addresses for this access
1187 1.1 oster * bp - buffer ptr (holds write data)
1188 1.3 oster * flags - general flags (e.g. disk locking)
1189 1.1 oster * allocList - list of memory allocated in DAG creation
1190 1.1 oster *****************************************************************************/
1191 1.1 oster
1192 1.3 oster void
1193 1.13 oster rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
1194 1.13 oster RF_DagHeader_t *dag_h, void *bp,
1195 1.13 oster RF_RaidAccessFlags_t flags,
1196 1.13 oster RF_AllocListElem_t *allocList)
1197 1.1 oster {
1198 1.3 oster RF_DagNode_t *unblockNode, *termNode, *commitNode;
1199 1.22 oster RF_DagNode_t *wndNode, *wmirNode;
1200 1.22 oster RF_DagNode_t *tmpNode, *tmpwndNode, *tmpwmirNode;
1201 1.3 oster int nWndNodes, nWmirNodes, i;
1202 1.3 oster RF_ReconUnitNum_t which_ru;
1203 1.3 oster RF_PhysDiskAddr_t *pda, *pdaP;
1204 1.3 oster RF_StripeNum_t parityStripeID;
1205 1.3 oster
1206 1.3 oster parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
1207 1.3 oster asmap->raidAddress, &which_ru);
1208 1.19 oster #if RF_DEBUG_DAG
1209 1.3 oster if (rf_dagDebug) {
1210 1.3 oster printf("[Creating RAID level 1 write DAG]\n");
1211 1.3 oster }
1212 1.19 oster #endif
1213 1.3 oster dag_h->creator = "RaidOneWriteDAG";
1214 1.3 oster
1215 1.3 oster /* 2 implies access not SU aligned */
1216 1.3 oster nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
1217 1.3 oster nWndNodes = (asmap->physInfo->next) ? 2 : 1;
1218 1.3 oster
1219 1.3 oster /* alloc the Wnd nodes and the Wmir node */
1220 1.3 oster if (asmap->numDataFailed == 1)
1221 1.3 oster nWndNodes--;
1222 1.3 oster if (asmap->numParityFailed == 1)
1223 1.3 oster nWmirNodes--;
1224 1.3 oster
1225 1.3 oster /* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
1226 1.3 oster * + terminator) */
1227 1.22 oster for (i = 0; i < nWndNodes; i++) {
1228 1.22 oster tmpNode = rf_AllocDAGNode();
1229 1.22 oster tmpNode->list_next = dag_h->nodes;
1230 1.22 oster dag_h->nodes = tmpNode;
1231 1.22 oster }
1232 1.22 oster wndNode = dag_h->nodes;
1233 1.22 oster
1234 1.22 oster for (i = 0; i < nWmirNodes; i++) {
1235 1.22 oster tmpNode = rf_AllocDAGNode();
1236 1.22 oster tmpNode->list_next = dag_h->nodes;
1237 1.22 oster dag_h->nodes = tmpNode;
1238 1.22 oster }
1239 1.22 oster wmirNode = dag_h->nodes;
1240 1.22 oster
1241 1.22 oster commitNode = rf_AllocDAGNode();
1242 1.22 oster commitNode->list_next = dag_h->nodes;
1243 1.22 oster dag_h->nodes = commitNode;
1244 1.22 oster
1245 1.22 oster unblockNode = rf_AllocDAGNode();
1246 1.22 oster unblockNode->list_next = dag_h->nodes;
1247 1.22 oster dag_h->nodes = unblockNode;
1248 1.22 oster
1249 1.22 oster termNode = rf_AllocDAGNode();
1250 1.22 oster termNode->list_next = dag_h->nodes;
1251 1.22 oster dag_h->nodes = termNode;
1252 1.3 oster
1253 1.3 oster /* this dag can commit immediately */
1254 1.3 oster dag_h->numCommitNodes = 1;
1255 1.3 oster dag_h->numCommits = 0;
1256 1.3 oster dag_h->numSuccedents = 1;
1257 1.3 oster
1258 1.3 oster /* initialize the commit, unblock, and term nodes */
1259 1.14 oster rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
1260 1.14 oster rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes),
1261 1.14 oster 0, 0, 0, dag_h, "Cmt", allocList);
1262 1.14 oster rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
1263 1.14 oster rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes),
1264 1.14 oster 0, 0, dag_h, "Nil", allocList);
1265 1.14 oster rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
1266 1.14 oster rf_TerminateUndoFunc, NULL, 0, 1, 0, 0,
1267 1.14 oster dag_h, "Trm", allocList);
1268 1.3 oster
1269 1.3 oster /* initialize the wnd nodes */
1270 1.3 oster if (nWndNodes > 0) {
1271 1.3 oster pda = asmap->physInfo;
1272 1.22 oster tmpwndNode = wndNode;
1273 1.3 oster for (i = 0; i < nWndNodes; i++) {
1274 1.22 oster rf_InitNode(tmpwndNode, rf_wait, RF_FALSE,
1275 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1276 1.14 oster rf_GenericWakeupFunc, 1, 1, 4, 0,
1277 1.14 oster dag_h, "Wpd", allocList);
1278 1.3 oster RF_ASSERT(pda != NULL);
1279 1.22 oster tmpwndNode->params[0].p = pda;
1280 1.22 oster tmpwndNode->params[1].p = pda->bufPtr;
1281 1.22 oster tmpwndNode->params[2].v = parityStripeID;
1282 1.22 oster tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1283 1.3 oster pda = pda->next;
1284 1.22 oster tmpwndNode = tmpwndNode->list_next;
1285 1.3 oster }
1286 1.3 oster RF_ASSERT(pda == NULL);
1287 1.3 oster }
1288 1.3 oster /* initialize the mirror nodes */
1289 1.3 oster if (nWmirNodes > 0) {
1290 1.3 oster pda = asmap->physInfo;
1291 1.3 oster pdaP = asmap->parityInfo;
1292 1.22 oster tmpwmirNode = wmirNode;
1293 1.3 oster for (i = 0; i < nWmirNodes; i++) {
1294 1.22 oster rf_InitNode(tmpwmirNode, rf_wait, RF_FALSE,
1295 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1296 1.14 oster rf_GenericWakeupFunc, 1, 1, 4, 0,
1297 1.14 oster dag_h, "Wsd", allocList);
1298 1.3 oster RF_ASSERT(pda != NULL);
1299 1.22 oster tmpwmirNode->params[0].p = pdaP;
1300 1.22 oster tmpwmirNode->params[1].p = pda->bufPtr;
1301 1.22 oster tmpwmirNode->params[2].v = parityStripeID;
1302 1.22 oster tmpwmirNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1303 1.3 oster pda = pda->next;
1304 1.3 oster pdaP = pdaP->next;
1305 1.22 oster tmpwmirNode = tmpwmirNode->list_next;
1306 1.3 oster }
1307 1.3 oster RF_ASSERT(pda == NULL);
1308 1.3 oster RF_ASSERT(pdaP == NULL);
1309 1.3 oster }
1310 1.3 oster /* link the header node to the commit node */
1311 1.3 oster RF_ASSERT(dag_h->numSuccedents == 1);
1312 1.3 oster RF_ASSERT(commitNode->numAntecedents == 0);
1313 1.3 oster dag_h->succedents[0] = commitNode;
1314 1.3 oster
1315 1.3 oster /* link the commit node to the write nodes */
1316 1.3 oster RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
1317 1.22 oster tmpwndNode = wndNode;
1318 1.3 oster for (i = 0; i < nWndNodes; i++) {
1319 1.22 oster RF_ASSERT(tmpwndNode->numAntecedents == 1);
1320 1.22 oster commitNode->succedents[i] = tmpwndNode;
1321 1.22 oster tmpwndNode->antecedents[0] = commitNode;
1322 1.22 oster tmpwndNode->antType[0] = rf_control;
1323 1.22 oster tmpwndNode = tmpwndNode->list_next;
1324 1.3 oster }
1325 1.22 oster tmpwmirNode = wmirNode;
1326 1.3 oster for (i = 0; i < nWmirNodes; i++) {
1327 1.22 oster RF_ASSERT(tmpwmirNode->numAntecedents == 1);
1328 1.22 oster commitNode->succedents[i + nWndNodes] = tmpwmirNode;
1329 1.22 oster tmpwmirNode->antecedents[0] = commitNode;
1330 1.22 oster tmpwmirNode->antType[0] = rf_control;
1331 1.22 oster tmpwmirNode = tmpwmirNode->list_next;
1332 1.3 oster }
1333 1.3 oster
1334 1.3 oster /* link the write nodes to the unblock node */
1335 1.3 oster RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
1336 1.22 oster tmpwndNode = wndNode;
1337 1.3 oster for (i = 0; i < nWndNodes; i++) {
1338 1.22 oster RF_ASSERT(tmpwndNode->numSuccedents == 1);
1339 1.22 oster tmpwndNode->succedents[0] = unblockNode;
1340 1.22 oster unblockNode->antecedents[i] = tmpwndNode;
1341 1.3 oster unblockNode->antType[i] = rf_control;
1342 1.22 oster tmpwndNode = tmpwndNode->list_next;
1343 1.3 oster }
1344 1.22 oster tmpwmirNode = wmirNode;
1345 1.3 oster for (i = 0; i < nWmirNodes; i++) {
1346 1.22 oster RF_ASSERT(tmpwmirNode->numSuccedents == 1);
1347 1.22 oster tmpwmirNode->succedents[0] = unblockNode;
1348 1.22 oster unblockNode->antecedents[i + nWndNodes] = tmpwmirNode;
1349 1.3 oster unblockNode->antType[i + nWndNodes] = rf_control;
1350 1.22 oster tmpwmirNode = tmpwmirNode->list_next;
1351 1.3 oster }
1352 1.3 oster
1353 1.3 oster /* link the unblock node to the term node */
1354 1.3 oster RF_ASSERT(unblockNode->numSuccedents == 1);
1355 1.3 oster RF_ASSERT(termNode->numAntecedents == 1);
1356 1.3 oster RF_ASSERT(termNode->numSuccedents == 0);
1357 1.3 oster unblockNode->succedents[0] = termNode;
1358 1.3 oster termNode->antecedents[0] = unblockNode;
1359 1.3 oster termNode->antType[0] = rf_control;
1360 1.1 oster }
1361