rf_dagffwr.c revision 1.22 1 1.22 oster /* $NetBSD: rf_dagffwr.c,v 1.22 2004/03/18 16:40:05 oster Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster
29 1.1 oster /*
30 1.1 oster * rf_dagff.c
31 1.1 oster *
32 1.1 oster * code for creating fault-free DAGs
33 1.1 oster *
34 1.1 oster */
35 1.7 lukem
36 1.7 lukem #include <sys/cdefs.h>
37 1.22 oster __KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 1.22 2004/03/18 16:40:05 oster Exp $");
38 1.1 oster
39 1.6 oster #include <dev/raidframe/raidframevar.h>
40 1.6 oster
41 1.1 oster #include "rf_raid.h"
42 1.1 oster #include "rf_dag.h"
43 1.1 oster #include "rf_dagutils.h"
44 1.1 oster #include "rf_dagfuncs.h"
45 1.1 oster #include "rf_debugMem.h"
46 1.1 oster #include "rf_dagffrd.h"
47 1.1 oster #include "rf_general.h"
48 1.1 oster #include "rf_dagffwr.h"
49 1.1 oster
50 1.1 oster /******************************************************************************
51 1.1 oster *
52 1.1 oster * General comments on DAG creation:
53 1.3 oster *
54 1.1 oster * All DAGs in this file use roll-away error recovery. Each DAG has a single
55 1.1 oster * commit node, usually called "Cmt." If an error occurs before the Cmt node
56 1.1 oster * is reached, the execution engine will halt forward execution and work
57 1.1 oster * backward through the graph, executing the undo functions. Assuming that
58 1.1 oster * each node in the graph prior to the Cmt node are undoable and atomic - or -
59 1.1 oster * does not make changes to permanent state, the graph will fail atomically.
60 1.1 oster * If an error occurs after the Cmt node executes, the engine will roll-forward
61 1.1 oster * through the graph, blindly executing nodes until it reaches the end.
62 1.1 oster * If a graph reaches the end, it is assumed to have completed successfully.
63 1.1 oster *
64 1.1 oster * A graph has only 1 Cmt node.
65 1.1 oster *
66 1.1 oster */
67 1.1 oster
68 1.1 oster
69 1.1 oster /******************************************************************************
70 1.1 oster *
71 1.1 oster * The following wrappers map the standard DAG creation interface to the
72 1.1 oster * DAG creation routines. Additionally, these wrappers enable experimentation
73 1.1 oster * with new DAG structures by providing an extra level of indirection, allowing
74 1.1 oster * the DAG creation routines to be replaced at this single point.
75 1.1 oster */
76 1.1 oster
77 1.1 oster
78 1.3 oster void
79 1.13 oster rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
80 1.13 oster RF_DagHeader_t *dag_h, void *bp,
81 1.13 oster RF_RaidAccessFlags_t flags,
82 1.13 oster RF_AllocListElem_t *allocList,
83 1.13 oster RF_IoType_t type)
84 1.1 oster {
85 1.3 oster rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
86 1.14 oster RF_IO_TYPE_WRITE);
87 1.1 oster }
88 1.1 oster
89 1.3 oster void
90 1.13 oster rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
91 1.13 oster RF_DagHeader_t *dag_h, void *bp,
92 1.13 oster RF_RaidAccessFlags_t flags,
93 1.13 oster RF_AllocListElem_t *allocList,
94 1.13 oster RF_IoType_t type)
95 1.1 oster {
96 1.3 oster rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
97 1.14 oster RF_IO_TYPE_WRITE);
98 1.1 oster }
99 1.1 oster
100 1.3 oster void
101 1.13 oster rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
102 1.13 oster RF_DagHeader_t *dag_h, void *bp,
103 1.13 oster RF_RaidAccessFlags_t flags,
104 1.13 oster RF_AllocListElem_t *allocList)
105 1.1 oster {
106 1.3 oster /* "normal" rollaway */
107 1.14 oster rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
108 1.14 oster allocList, &rf_xorFuncs, NULL);
109 1.1 oster }
110 1.1 oster
111 1.3 oster void
112 1.13 oster rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
113 1.13 oster RF_DagHeader_t *dag_h, void *bp,
114 1.13 oster RF_RaidAccessFlags_t flags,
115 1.13 oster RF_AllocListElem_t *allocList)
116 1.1 oster {
117 1.3 oster /* "normal" rollaway */
118 1.14 oster rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
119 1.14 oster allocList, 1, rf_RegularXorFunc, RF_TRUE);
120 1.1 oster }
121 1.1 oster
122 1.1 oster
123 1.1 oster /******************************************************************************
124 1.1 oster *
125 1.1 oster * DAG creation code begins here
126 1.1 oster */
127 1.1 oster
128 1.1 oster
129 1.1 oster /******************************************************************************
130 1.1 oster *
131 1.1 oster * creates a DAG to perform a large-write operation:
132 1.1 oster *
133 1.1 oster * / Rod \ / Wnd \
134 1.1 oster * H -- block- Rod - Xor - Cmt - Wnd --- T
135 1.1 oster * \ Rod / \ Wnp /
136 1.1 oster * \[Wnq]/
137 1.1 oster *
138 1.1 oster * The XOR node also does the Q calculation in the P+Q architecture.
139 1.1 oster * All nodes are before the commit node (Cmt) are assumed to be atomic and
140 1.1 oster * undoable - or - they make no changes to permanent state.
141 1.1 oster *
142 1.1 oster * Rod = read old data
143 1.1 oster * Cmt = commit node
144 1.1 oster * Wnp = write new parity
145 1.1 oster * Wnd = write new data
146 1.1 oster * Wnq = write new "q"
147 1.1 oster * [] denotes optional segments in the graph
148 1.1 oster *
149 1.1 oster * Parameters: raidPtr - description of the physical array
150 1.1 oster * asmap - logical & physical addresses for this access
151 1.1 oster * bp - buffer ptr (holds write data)
152 1.3 oster * flags - general flags (e.g. disk locking)
153 1.1 oster * allocList - list of memory allocated in DAG creation
154 1.1 oster * nfaults - number of faults array can tolerate
155 1.1 oster * (equal to # redundancy units in stripe)
156 1.1 oster * redfuncs - list of redundancy generating functions
157 1.1 oster *
158 1.1 oster *****************************************************************************/
159 1.1 oster
160 1.3 oster void
161 1.13 oster rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
162 1.13 oster RF_DagHeader_t *dag_h, void *bp,
163 1.13 oster RF_RaidAccessFlags_t flags,
164 1.13 oster RF_AllocListElem_t *allocList,
165 1.13 oster int nfaults, int (*redFunc) (RF_DagNode_t *),
166 1.13 oster int allowBufferRecycle)
167 1.1 oster {
168 1.22 oster RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode;
169 1.3 oster RF_DagNode_t *wnqNode, *blockNode, *commitNode, *termNode;
170 1.3 oster int nWndNodes, nRodNodes, i, nodeNum, asmNum;
171 1.3 oster RF_AccessStripeMapHeader_t *new_asm_h[2];
172 1.3 oster RF_StripeNum_t parityStripeID;
173 1.3 oster char *sosBuffer, *eosBuffer;
174 1.3 oster RF_ReconUnitNum_t which_ru;
175 1.3 oster RF_RaidLayout_t *layoutPtr;
176 1.3 oster RF_PhysDiskAddr_t *pda;
177 1.3 oster
178 1.3 oster layoutPtr = &(raidPtr->Layout);
179 1.14 oster parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
180 1.14 oster asmap->raidAddress,
181 1.14 oster &which_ru);
182 1.3 oster
183 1.19 oster #if RF_DEBUG_DAG
184 1.3 oster if (rf_dagDebug) {
185 1.3 oster printf("[Creating large-write DAG]\n");
186 1.3 oster }
187 1.19 oster #endif
188 1.3 oster dag_h->creator = "LargeWriteDAG";
189 1.3 oster
190 1.3 oster dag_h->numCommitNodes = 1;
191 1.3 oster dag_h->numCommits = 0;
192 1.3 oster dag_h->numSuccedents = 1;
193 1.3 oster
194 1.3 oster /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */
195 1.3 oster nWndNodes = asmap->numStripeUnitsAccessed;
196 1.22 oster
197 1.22 oster for (i = 0; i < nWndNodes; i++) {
198 1.22 oster tmpNode = rf_AllocDAGNode();
199 1.22 oster tmpNode->list_next = dag_h->nodes;
200 1.22 oster dag_h->nodes = tmpNode;
201 1.22 oster }
202 1.22 oster wndNodes = dag_h->nodes;
203 1.22 oster
204 1.22 oster xorNode = rf_AllocDAGNode();
205 1.22 oster xorNode->list_next = dag_h->nodes;
206 1.22 oster dag_h->nodes = xorNode;
207 1.22 oster
208 1.22 oster wnpNode = rf_AllocDAGNode();
209 1.22 oster wnpNode->list_next = dag_h->nodes;
210 1.22 oster dag_h->nodes = wnpNode;
211 1.22 oster
212 1.22 oster blockNode = rf_AllocDAGNode();
213 1.22 oster blockNode->list_next = dag_h->nodes;
214 1.22 oster dag_h->nodes = blockNode;
215 1.22 oster
216 1.22 oster commitNode = rf_AllocDAGNode();
217 1.22 oster commitNode->list_next = dag_h->nodes;
218 1.22 oster dag_h->nodes = commitNode;
219 1.22 oster
220 1.22 oster termNode = rf_AllocDAGNode();
221 1.22 oster termNode->list_next = dag_h->nodes;
222 1.22 oster dag_h->nodes = termNode;
223 1.22 oster
224 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
225 1.3 oster if (nfaults == 2) {
226 1.22 oster wnqNode = rf_AllocDAGNode();
227 1.3 oster } else {
228 1.20 oster #endif
229 1.3 oster wnqNode = NULL;
230 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
231 1.3 oster }
232 1.20 oster #endif
233 1.14 oster rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
234 1.14 oster new_asm_h, &nRodNodes, &sosBuffer,
235 1.14 oster &eosBuffer, allocList);
236 1.3 oster if (nRodNodes > 0) {
237 1.22 oster for (i = 0; i < nRodNodes; i++) {
238 1.22 oster tmpNode = rf_AllocDAGNode();
239 1.22 oster tmpNode->list_next = dag_h->nodes;
240 1.22 oster dag_h->nodes = tmpNode;
241 1.22 oster }
242 1.22 oster rodNodes = dag_h->nodes;
243 1.3 oster } else {
244 1.3 oster rodNodes = NULL;
245 1.3 oster }
246 1.3 oster
247 1.3 oster /* begin node initialization */
248 1.3 oster if (nRodNodes > 0) {
249 1.14 oster rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
250 1.14 oster rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0,
251 1.14 oster dag_h, "Nil", allocList);
252 1.3 oster } else {
253 1.14 oster rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
254 1.14 oster rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0,
255 1.14 oster dag_h, "Nil", allocList);
256 1.3 oster }
257 1.3 oster
258 1.14 oster rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
259 1.14 oster rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0,
260 1.14 oster dag_h, "Cmt", allocList);
261 1.14 oster rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
262 1.14 oster rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0,
263 1.14 oster dag_h, "Trm", allocList);
264 1.3 oster
265 1.3 oster /* initialize the Rod nodes */
266 1.22 oster tmpNode = rodNodes;
267 1.3 oster for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
268 1.3 oster if (new_asm_h[asmNum]) {
269 1.3 oster pda = new_asm_h[asmNum]->stripeMap->physInfo;
270 1.3 oster while (pda) {
271 1.22 oster rf_InitNode(tmpNode, rf_wait,
272 1.14 oster RF_FALSE, rf_DiskReadFunc,
273 1.14 oster rf_DiskReadUndoFunc,
274 1.14 oster rf_GenericWakeupFunc,
275 1.14 oster 1, 1, 4, 0, dag_h,
276 1.14 oster "Rod", allocList);
277 1.22 oster tmpNode->params[0].p = pda;
278 1.22 oster tmpNode->params[1].p = pda->bufPtr;
279 1.22 oster tmpNode->params[2].v = parityStripeID;
280 1.22 oster tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
281 1.17 oster which_ru);
282 1.3 oster nodeNum++;
283 1.3 oster pda = pda->next;
284 1.22 oster tmpNode = tmpNode->list_next;
285 1.3 oster }
286 1.3 oster }
287 1.3 oster }
288 1.3 oster RF_ASSERT(nodeNum == nRodNodes);
289 1.3 oster
290 1.3 oster /* initialize the wnd nodes */
291 1.3 oster pda = asmap->physInfo;
292 1.22 oster tmpNode = wndNodes;
293 1.3 oster for (i = 0; i < nWndNodes; i++) {
294 1.22 oster rf_InitNode(tmpNode, rf_wait, RF_FALSE,
295 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
296 1.14 oster rf_GenericWakeupFunc, 1, 1, 4, 0,
297 1.14 oster dag_h, "Wnd", allocList);
298 1.3 oster RF_ASSERT(pda != NULL);
299 1.22 oster tmpNode->params[0].p = pda;
300 1.22 oster tmpNode->params[1].p = pda->bufPtr;
301 1.22 oster tmpNode->params[2].v = parityStripeID;
302 1.22 oster tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
303 1.3 oster pda = pda->next;
304 1.22 oster tmpNode = tmpNode->list_next;
305 1.3 oster }
306 1.3 oster
307 1.3 oster /* initialize the redundancy node */
308 1.3 oster if (nRodNodes > 0) {
309 1.14 oster rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
310 1.14 oster rf_NullNodeUndoFunc, NULL, 1,
311 1.14 oster nRodNodes, 2 * (nWndNodes + nRodNodes) + 1,
312 1.14 oster nfaults, dag_h, "Xr ", allocList);
313 1.3 oster } else {
314 1.14 oster rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
315 1.14 oster rf_NullNodeUndoFunc, NULL, 1,
316 1.14 oster 1, 2 * (nWndNodes + nRodNodes) + 1,
317 1.14 oster nfaults, dag_h, "Xr ", allocList);
318 1.3 oster }
319 1.3 oster xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
320 1.22 oster tmpNode = wndNodes;
321 1.3 oster for (i = 0; i < nWndNodes; i++) {
322 1.14 oster /* pda */
323 1.22 oster xorNode->params[2 * i + 0] = tmpNode->params[0];
324 1.14 oster /* buf ptr */
325 1.22 oster xorNode->params[2 * i + 1] = tmpNode->params[1];
326 1.22 oster tmpNode = tmpNode->list_next;
327 1.3 oster }
328 1.22 oster tmpNode = rodNodes;
329 1.3 oster for (i = 0; i < nRodNodes; i++) {
330 1.14 oster /* pda */
331 1.22 oster xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0];
332 1.14 oster /* buf ptr */
333 1.22 oster xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1];
334 1.22 oster tmpNode = tmpNode->list_next;
335 1.3 oster }
336 1.3 oster /* xor node needs to get at RAID information */
337 1.3 oster xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
338 1.3 oster
339 1.3 oster /*
340 1.14 oster * Look for an Rod node that reads a complete SU. If none,
341 1.14 oster * alloc a buffer to receive the parity info. Note that we
342 1.14 oster * can't use a new data buffer because it will not have gotten
343 1.14 oster * written when the xor occurs. */
344 1.3 oster if (allowBufferRecycle) {
345 1.22 oster tmpNode = rodNodes;
346 1.3 oster for (i = 0; i < nRodNodes; i++) {
347 1.22 oster if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
348 1.3 oster break;
349 1.22 oster tmpNode = tmpNode->list_next;
350 1.3 oster }
351 1.3 oster }
352 1.3 oster if ((!allowBufferRecycle) || (i == nRodNodes)) {
353 1.12 oster RF_MallocAndAdd(xorNode->results[0],
354 1.12 oster rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
355 1.12 oster (void *), allocList);
356 1.3 oster } else {
357 1.22 oster /* this works because the only way we get here is if
358 1.22 oster allowBufferRecycle is true and we went through the
359 1.22 oster above for loop, and exited via the break before
360 1.22 oster i==nRodNodes was true. That means tmpNode will
361 1.22 oster still point to a valid node -- the one we want for
362 1.22 oster here! */
363 1.22 oster xorNode->results[0] = tmpNode->params[1].p;
364 1.3 oster }
365 1.3 oster
366 1.3 oster /* initialize the Wnp node */
367 1.14 oster rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
368 1.14 oster rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
369 1.14 oster dag_h, "Wnp", allocList);
370 1.3 oster wnpNode->params[0].p = asmap->parityInfo;
371 1.3 oster wnpNode->params[1].p = xorNode->results[0];
372 1.3 oster wnpNode->params[2].v = parityStripeID;
373 1.17 oster wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
374 1.3 oster /* parityInfo must describe entire parity unit */
375 1.3 oster RF_ASSERT(asmap->parityInfo->next == NULL);
376 1.3 oster
377 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
378 1.3 oster if (nfaults == 2) {
379 1.3 oster /*
380 1.3 oster * We never try to recycle a buffer for the Q calcuation
381 1.3 oster * in addition to the parity. This would cause two buffers
382 1.3 oster * to get smashed during the P and Q calculation, guaranteeing
383 1.3 oster * one would be wrong.
384 1.3 oster */
385 1.12 oster RF_MallocAndAdd(xorNode->results[1],
386 1.12 oster rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
387 1.12 oster (void *), allocList);
388 1.14 oster rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
389 1.14 oster rf_DiskWriteUndoFunc, rf_GenericWakeupFunc,
390 1.14 oster 1, 1, 4, 0, dag_h, "Wnq", allocList);
391 1.3 oster wnqNode->params[0].p = asmap->qInfo;
392 1.3 oster wnqNode->params[1].p = xorNode->results[1];
393 1.3 oster wnqNode->params[2].v = parityStripeID;
394 1.17 oster wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
395 1.3 oster /* parityInfo must describe entire parity unit */
396 1.3 oster RF_ASSERT(asmap->parityInfo->next == NULL);
397 1.3 oster }
398 1.20 oster #endif
399 1.3 oster /*
400 1.3 oster * Connect nodes to form graph.
401 1.3 oster */
402 1.3 oster
403 1.3 oster /* connect dag header to block node */
404 1.3 oster RF_ASSERT(blockNode->numAntecedents == 0);
405 1.3 oster dag_h->succedents[0] = blockNode;
406 1.3 oster
407 1.3 oster if (nRodNodes > 0) {
408 1.3 oster /* connect the block node to the Rod nodes */
409 1.3 oster RF_ASSERT(blockNode->numSuccedents == nRodNodes);
410 1.3 oster RF_ASSERT(xorNode->numAntecedents == nRodNodes);
411 1.22 oster tmpNode = rodNodes;
412 1.3 oster for (i = 0; i < nRodNodes; i++) {
413 1.22 oster RF_ASSERT(tmpNode.numAntecedents == 1);
414 1.22 oster blockNode->succedents[i] = tmpNode;
415 1.22 oster tmpNode->antecedents[0] = blockNode;
416 1.22 oster tmpNode->antType[0] = rf_control;
417 1.3 oster
418 1.3 oster /* connect the Rod nodes to the Xor node */
419 1.22 oster RF_ASSERT(tmpNode.numSuccedents == 1);
420 1.22 oster tmpNode->succedents[0] = xorNode;
421 1.22 oster xorNode->antecedents[i] = tmpNode;
422 1.3 oster xorNode->antType[i] = rf_trueData;
423 1.22 oster tmpNode = tmpNode->list_next;
424 1.3 oster }
425 1.3 oster } else {
426 1.3 oster /* connect the block node to the Xor node */
427 1.3 oster RF_ASSERT(blockNode->numSuccedents == 1);
428 1.3 oster RF_ASSERT(xorNode->numAntecedents == 1);
429 1.3 oster blockNode->succedents[0] = xorNode;
430 1.3 oster xorNode->antecedents[0] = blockNode;
431 1.3 oster xorNode->antType[0] = rf_control;
432 1.3 oster }
433 1.3 oster
434 1.3 oster /* connect the xor node to the commit node */
435 1.3 oster RF_ASSERT(xorNode->numSuccedents == 1);
436 1.3 oster RF_ASSERT(commitNode->numAntecedents == 1);
437 1.3 oster xorNode->succedents[0] = commitNode;
438 1.3 oster commitNode->antecedents[0] = xorNode;
439 1.3 oster commitNode->antType[0] = rf_control;
440 1.3 oster
441 1.3 oster /* connect the commit node to the write nodes */
442 1.3 oster RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
443 1.22 oster tmpNode = wndNodes;
444 1.3 oster for (i = 0; i < nWndNodes; i++) {
445 1.3 oster RF_ASSERT(wndNodes->numAntecedents == 1);
446 1.22 oster commitNode->succedents[i] = tmpNode;
447 1.22 oster tmpNode->antecedents[0] = commitNode;
448 1.22 oster tmpNode->antType[0] = rf_control;
449 1.22 oster tmpNode = tmpNode->list_next;
450 1.3 oster }
451 1.3 oster RF_ASSERT(wnpNode->numAntecedents == 1);
452 1.3 oster commitNode->succedents[nWndNodes] = wnpNode;
453 1.3 oster wnpNode->antecedents[0] = commitNode;
454 1.3 oster wnpNode->antType[0] = rf_trueData;
455 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
456 1.3 oster if (nfaults == 2) {
457 1.3 oster RF_ASSERT(wnqNode->numAntecedents == 1);
458 1.3 oster commitNode->succedents[nWndNodes + 1] = wnqNode;
459 1.3 oster wnqNode->antecedents[0] = commitNode;
460 1.3 oster wnqNode->antType[0] = rf_trueData;
461 1.3 oster }
462 1.20 oster #endif
463 1.3 oster /* connect the write nodes to the term node */
464 1.3 oster RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
465 1.3 oster RF_ASSERT(termNode->numSuccedents == 0);
466 1.22 oster tmpNode = wndNodes;
467 1.3 oster for (i = 0; i < nWndNodes; i++) {
468 1.3 oster RF_ASSERT(wndNodes->numSuccedents == 1);
469 1.22 oster tmpNode->succedents[0] = termNode;
470 1.22 oster termNode->antecedents[i] = tmpNode;
471 1.3 oster termNode->antType[i] = rf_control;
472 1.22 oster tmpNode = tmpNode->list_next;
473 1.3 oster }
474 1.3 oster RF_ASSERT(wnpNode->numSuccedents == 1);
475 1.3 oster wnpNode->succedents[0] = termNode;
476 1.3 oster termNode->antecedents[nWndNodes] = wnpNode;
477 1.3 oster termNode->antType[nWndNodes] = rf_control;
478 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
479 1.3 oster if (nfaults == 2) {
480 1.3 oster RF_ASSERT(wnqNode->numSuccedents == 1);
481 1.3 oster wnqNode->succedents[0] = termNode;
482 1.3 oster termNode->antecedents[nWndNodes + 1] = wnqNode;
483 1.3 oster termNode->antType[nWndNodes + 1] = rf_control;
484 1.3 oster }
485 1.20 oster #endif
486 1.1 oster }
487 1.1 oster /******************************************************************************
488 1.1 oster *
489 1.1 oster * creates a DAG to perform a small-write operation (either raid 5 or pq),
490 1.1 oster * which is as follows:
491 1.1 oster *
492 1.1 oster * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
493 1.1 oster * \- Rod X / \----> Wnd [Und]-/
494 1.1 oster * [\- Rod X / \---> Wnd [Und]-/]
495 1.1 oster * [\- Roq -> Q / \--> Wnq [Unq]-/]
496 1.1 oster *
497 1.1 oster * Rop = read old parity
498 1.1 oster * Rod = read old data
499 1.1 oster * Roq = read old "q"
500 1.1 oster * Cmt = commit node
501 1.1 oster * Und = unlock data disk
502 1.1 oster * Unp = unlock parity disk
503 1.1 oster * Unq = unlock q disk
504 1.1 oster * Wnp = write new parity
505 1.1 oster * Wnd = write new data
506 1.1 oster * Wnq = write new "q"
507 1.1 oster * [ ] denotes optional segments in the graph
508 1.1 oster *
509 1.1 oster * Parameters: raidPtr - description of the physical array
510 1.1 oster * asmap - logical & physical addresses for this access
511 1.1 oster * bp - buffer ptr (holds write data)
512 1.3 oster * flags - general flags (e.g. disk locking)
513 1.1 oster * allocList - list of memory allocated in DAG creation
514 1.1 oster * pfuncs - list of parity generating functions
515 1.1 oster * qfuncs - list of q generating functions
516 1.1 oster *
517 1.1 oster * A null qfuncs indicates single fault tolerant
518 1.1 oster *****************************************************************************/
519 1.1 oster
520 1.3 oster void
521 1.13 oster rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
522 1.13 oster RF_DagHeader_t *dag_h, void *bp,
523 1.13 oster RF_RaidAccessFlags_t flags,
524 1.13 oster RF_AllocListElem_t *allocList,
525 1.13 oster const RF_RedFuncs_t *pfuncs,
526 1.13 oster const RF_RedFuncs_t *qfuncs)
527 1.1 oster {
528 1.3 oster RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode;
529 1.22 oster RF_DagNode_t *tmpNode, *tmpreadDataNode, *tmpreadParityNode;
530 1.22 oster RF_DagNode_t *xorNodes, *qNodes, *blockNode, *commitNode;
531 1.3 oster RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes;
532 1.22 oster RF_DagNode_t *tmpxorNode, *tmpqNode, *tmpwriteDataNode, *tmpreadQNode;
533 1.22 oster RF_DagNode_t *tmpwriteParityNode;
534 1.22 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
535 1.22 oster RF_DagNode_t *tmpwriteQNode;
536 1.22 oster #endif
537 1.16 oster int i, j, nNodes, totalNumNodes;
538 1.3 oster RF_ReconUnitNum_t which_ru;
539 1.3 oster int (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
540 1.3 oster int (*qfunc) (RF_DagNode_t *);
541 1.3 oster int numDataNodes, numParityNodes;
542 1.3 oster RF_StripeNum_t parityStripeID;
543 1.3 oster RF_PhysDiskAddr_t *pda;
544 1.3 oster char *name, *qname;
545 1.3 oster long nfaults;
546 1.3 oster
547 1.3 oster nfaults = qfuncs ? 2 : 1;
548 1.3 oster
549 1.3 oster parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
550 1.3 oster asmap->raidAddress, &which_ru);
551 1.3 oster pda = asmap->physInfo;
552 1.3 oster numDataNodes = asmap->numStripeUnitsAccessed;
553 1.3 oster numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
554 1.3 oster
555 1.19 oster #if RF_DEBUG_DAG
556 1.3 oster if (rf_dagDebug) {
557 1.3 oster printf("[Creating small-write DAG]\n");
558 1.3 oster }
559 1.19 oster #endif
560 1.3 oster RF_ASSERT(numDataNodes > 0);
561 1.3 oster dag_h->creator = "SmallWriteDAG";
562 1.3 oster
563 1.3 oster dag_h->numCommitNodes = 1;
564 1.3 oster dag_h->numCommits = 0;
565 1.3 oster dag_h->numSuccedents = 1;
566 1.3 oster
567 1.3 oster /*
568 1.3 oster * DAG creation occurs in four steps:
569 1.3 oster * 1. count the number of nodes in the DAG
570 1.3 oster * 2. create the nodes
571 1.3 oster * 3. initialize the nodes
572 1.3 oster * 4. connect the nodes
573 1.3 oster */
574 1.3 oster
575 1.3 oster /*
576 1.3 oster * Step 1. compute number of nodes in the graph
577 1.3 oster */
578 1.3 oster
579 1.14 oster /* number of nodes: a read and write for each data unit a
580 1.14 oster * redundancy computation node for each parity node (nfaults *
581 1.14 oster * nparity) a read and write for each parity unit a block and
582 1.14 oster * commit node (2) a terminate node if atomic RMW an unlock
583 1.14 oster * node for each data unit, redundancy unit */
584 1.3 oster totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
585 1.3 oster + (nfaults * 2 * numParityNodes) + 3;
586 1.3 oster /*
587 1.3 oster * Step 2. create the nodes
588 1.3 oster */
589 1.22 oster
590 1.22 oster blockNode = rf_AllocDAGNode();
591 1.22 oster blockNode->list_next = dag_h->nodes;
592 1.22 oster dag_h->nodes = blockNode;
593 1.22 oster
594 1.22 oster commitNode = rf_AllocDAGNode();
595 1.22 oster commitNode->list_next = dag_h->nodes;
596 1.22 oster dag_h->nodes = commitNode;
597 1.22 oster
598 1.22 oster for (i = 0; i < numDataNodes; i++) {
599 1.22 oster tmpNode = rf_AllocDAGNode();
600 1.22 oster tmpNode->list_next = dag_h->nodes;
601 1.22 oster dag_h->nodes = tmpNode;
602 1.22 oster }
603 1.22 oster readDataNodes = dag_h->nodes;
604 1.22 oster
605 1.22 oster for (i = 0; i < numParityNodes; i++) {
606 1.22 oster tmpNode = rf_AllocDAGNode();
607 1.22 oster tmpNode->list_next = dag_h->nodes;
608 1.22 oster dag_h->nodes = tmpNode;
609 1.22 oster }
610 1.22 oster readParityNodes = dag_h->nodes;
611 1.22 oster
612 1.22 oster for (i = 0; i < numDataNodes; i++) {
613 1.22 oster tmpNode = rf_AllocDAGNode();
614 1.22 oster tmpNode->list_next = dag_h->nodes;
615 1.22 oster dag_h->nodes = tmpNode;
616 1.22 oster }
617 1.22 oster writeDataNodes = dag_h->nodes;
618 1.22 oster
619 1.22 oster for (i = 0; i < numParityNodes; i++) {
620 1.22 oster tmpNode = rf_AllocDAGNode();
621 1.22 oster tmpNode->list_next = dag_h->nodes;
622 1.22 oster dag_h->nodes = tmpNode;
623 1.22 oster }
624 1.22 oster writeParityNodes = dag_h->nodes;
625 1.22 oster
626 1.22 oster for (i = 0; i < numParityNodes; i++) {
627 1.22 oster tmpNode = rf_AllocDAGNode();
628 1.22 oster tmpNode->list_next = dag_h->nodes;
629 1.22 oster dag_h->nodes = tmpNode;
630 1.22 oster }
631 1.22 oster xorNodes = dag_h->nodes;
632 1.22 oster
633 1.22 oster termNode = rf_AllocDAGNode();
634 1.22 oster termNode->list_next = dag_h->nodes;
635 1.22 oster dag_h->nodes = termNode;
636 1.16 oster
637 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
638 1.3 oster if (nfaults == 2) {
639 1.22 oster for (i = 0; i < numParityNodes; i++) {
640 1.22 oster tmpNode = rf_AllocDAGNode();
641 1.22 oster tmpNode->list_next = dag_h->nodes;
642 1.22 oster dag_h->nodes = tmpNode;
643 1.22 oster }
644 1.22 oster readQNodes = dag_h->nodes;
645 1.22 oster
646 1.22 oster for (i = 0; i < numParityNodes; i++) {
647 1.22 oster tmpNode = rf_AllocDAGNode();
648 1.22 oster tmpNode->list_next = dag_h->nodes;
649 1.22 oster dag_h->nodes = tmpNode;
650 1.22 oster }
651 1.22 oster writeQNodes = dag_h->nodes;
652 1.22 oster
653 1.22 oster for (i = 0; i < numParityNodes; i++) {
654 1.22 oster tmpNode = rf_AllocDAGNode();
655 1.22 oster tmpNode->list_next = dag_h->nodes;
656 1.22 oster dag_h->nodes = tmpNode;
657 1.22 oster }
658 1.22 oster qNodes = dag_h->nodes;
659 1.3 oster } else {
660 1.20 oster #endif
661 1.18 oster readQNodes = writeQNodes = qNodes = NULL;
662 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
663 1.3 oster }
664 1.20 oster #endif
665 1.3 oster RF_ASSERT(i == totalNumNodes);
666 1.3 oster
667 1.3 oster /*
668 1.3 oster * Step 3. initialize the nodes
669 1.3 oster */
670 1.3 oster /* initialize block node (Nil) */
671 1.3 oster nNodes = numDataNodes + (nfaults * numParityNodes);
672 1.14 oster rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
673 1.14 oster rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0,
674 1.14 oster dag_h, "Nil", allocList);
675 1.3 oster
676 1.3 oster /* initialize commit node (Cmt) */
677 1.14 oster rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
678 1.14 oster rf_NullNodeUndoFunc, NULL, nNodes,
679 1.14 oster (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
680 1.3 oster
681 1.3 oster /* initialize terminate node (Trm) */
682 1.14 oster rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
683 1.14 oster rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0,
684 1.14 oster dag_h, "Trm", allocList);
685 1.3 oster
686 1.3 oster /* initialize nodes which read old data (Rod) */
687 1.22 oster tmpreadDataNode = readDataNodes;
688 1.3 oster for (i = 0; i < numDataNodes; i++) {
689 1.22 oster rf_InitNode(tmpreadDataNode, rf_wait, RF_FALSE,
690 1.14 oster rf_DiskReadFunc, rf_DiskReadUndoFunc,
691 1.14 oster rf_GenericWakeupFunc, (nfaults * numParityNodes),
692 1.14 oster 1, 4, 0, dag_h, "Rod", allocList);
693 1.3 oster RF_ASSERT(pda != NULL);
694 1.3 oster /* physical disk addr desc */
695 1.22 oster tmpreadDataNode->params[0].p = pda;
696 1.3 oster /* buffer to hold old data */
697 1.22 oster tmpreadDataNode->params[1].p = rf_AllocBuffer(raidPtr, pda, allocList);
698 1.22 oster tmpreadDataNode->params[2].v = parityStripeID;
699 1.22 oster tmpreadDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
700 1.17 oster which_ru);
701 1.3 oster pda = pda->next;
702 1.22 oster for (j = 0; j < tmpreadDataNode->numSuccedents; j++) {
703 1.22 oster tmpreadDataNode->propList[j] = NULL;
704 1.3 oster }
705 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
706 1.3 oster }
707 1.3 oster
708 1.3 oster /* initialize nodes which read old parity (Rop) */
709 1.3 oster pda = asmap->parityInfo;
710 1.3 oster i = 0;
711 1.22 oster tmpreadParityNode = readParityNodes;
712 1.3 oster for (i = 0; i < numParityNodes; i++) {
713 1.3 oster RF_ASSERT(pda != NULL);
714 1.22 oster rf_InitNode(tmpreadParityNode, rf_wait, RF_FALSE,
715 1.14 oster rf_DiskReadFunc, rf_DiskReadUndoFunc,
716 1.14 oster rf_GenericWakeupFunc, numParityNodes, 1, 4, 0,
717 1.14 oster dag_h, "Rop", allocList);
718 1.22 oster tmpreadParityNode->params[0].p = pda;
719 1.3 oster /* buffer to hold old parity */
720 1.22 oster tmpreadParityNode->params[1].p = rf_AllocBuffer(raidPtr, pda, allocList);
721 1.22 oster tmpreadParityNode->params[2].v = parityStripeID;
722 1.22 oster tmpreadParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
723 1.17 oster which_ru);
724 1.3 oster pda = pda->next;
725 1.22 oster for (j = 0; j < tmpreadParityNode->numSuccedents; j++) {
726 1.22 oster tmpreadParityNode->propList[0] = NULL;
727 1.3 oster }
728 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
729 1.3 oster }
730 1.3 oster
731 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
732 1.3 oster /* initialize nodes which read old Q (Roq) */
733 1.3 oster if (nfaults == 2) {
734 1.3 oster pda = asmap->qInfo;
735 1.22 oster tmpreadQNode = readQNodes;
736 1.3 oster for (i = 0; i < numParityNodes; i++) {
737 1.3 oster RF_ASSERT(pda != NULL);
738 1.22 oster rf_InitNode(tmpreadQNode, rf_wait, RF_FALSE,
739 1.14 oster rf_DiskReadFunc, rf_DiskReadUndoFunc,
740 1.14 oster rf_GenericWakeupFunc, numParityNodes,
741 1.14 oster 1, 4, 0, dag_h, "Roq", allocList);
742 1.22 oster tmpreadQNode->params[0].p = pda;
743 1.3 oster /* buffer to hold old Q */
744 1.22 oster tmpreadQNode->params[1].p = rf_AllocBuffer(raidPtr, pda, allocList);
745 1.22 oster tmpreadQNode->params[2].v = parityStripeID;
746 1.22 oster tmpreadQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
747 1.17 oster which_ru);
748 1.3 oster pda = pda->next;
749 1.22 oster for (j = 0; j < tmpreadQNode->numSuccedents; j++) {
750 1.22 oster tmpreadQNode->propList[0] = NULL;
751 1.3 oster }
752 1.22 oster tmpreadQNode = tmpreadQNode->list_next;
753 1.3 oster }
754 1.3 oster }
755 1.20 oster #endif
756 1.3 oster /* initialize nodes which write new data (Wnd) */
757 1.3 oster pda = asmap->physInfo;
758 1.22 oster tmpwriteDataNode = writeDataNodes;
759 1.3 oster for (i = 0; i < numDataNodes; i++) {
760 1.3 oster RF_ASSERT(pda != NULL);
761 1.22 oster rf_InitNode(tmpwriteDataNode, rf_wait, RF_FALSE,
762 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
763 1.14 oster rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
764 1.14 oster "Wnd", allocList);
765 1.3 oster /* physical disk addr desc */
766 1.22 oster tmpwriteDataNode->params[0].p = pda;
767 1.3 oster /* buffer holding new data to be written */
768 1.22 oster tmpwriteDataNode->params[1].p = pda->bufPtr;
769 1.22 oster tmpwriteDataNode->params[2].v = parityStripeID;
770 1.22 oster tmpwriteDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
771 1.17 oster which_ru);
772 1.3 oster pda = pda->next;
773 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
774 1.3 oster }
775 1.3 oster
776 1.3 oster /*
777 1.3 oster * Initialize nodes which compute new parity and Q.
778 1.3 oster */
779 1.3 oster /*
780 1.3 oster * We use the simple XOR func in the double-XOR case, and when
781 1.14 oster * we're accessing only a portion of one stripe unit. The
782 1.14 oster * distinction between the two is that the regular XOR func
783 1.14 oster * assumes that the targbuf is a full SU in size, and examines
784 1.14 oster * the pda associated with the buffer to decide where within
785 1.14 oster * the buffer to XOR the data, whereas the simple XOR func
786 1.14 oster * just XORs the data into the start of the buffer. */
787 1.3 oster if ((numParityNodes == 2) || ((numDataNodes == 1)
788 1.14 oster && (asmap->totalSectorsAccessed <
789 1.14 oster raidPtr->Layout.sectorsPerStripeUnit))) {
790 1.3 oster func = pfuncs->simple;
791 1.3 oster undoFunc = rf_NullNodeUndoFunc;
792 1.3 oster name = pfuncs->SimpleName;
793 1.3 oster if (qfuncs) {
794 1.3 oster qfunc = qfuncs->simple;
795 1.3 oster qname = qfuncs->SimpleName;
796 1.3 oster } else {
797 1.3 oster qfunc = NULL;
798 1.3 oster qname = NULL;
799 1.3 oster }
800 1.3 oster } else {
801 1.3 oster func = pfuncs->regular;
802 1.3 oster undoFunc = rf_NullNodeUndoFunc;
803 1.3 oster name = pfuncs->RegularName;
804 1.3 oster if (qfuncs) {
805 1.3 oster qfunc = qfuncs->regular;
806 1.3 oster qname = qfuncs->RegularName;
807 1.3 oster } else {
808 1.3 oster qfunc = NULL;
809 1.3 oster qname = NULL;
810 1.3 oster }
811 1.3 oster }
812 1.3 oster /*
813 1.3 oster * Initialize the xor nodes: params are {pda,buf}
814 1.3 oster * from {Rod,Wnd,Rop} nodes, and raidPtr
815 1.3 oster */
816 1.3 oster if (numParityNodes == 2) {
817 1.3 oster /* double-xor case */
818 1.22 oster tmpxorNode = xorNodes;
819 1.22 oster tmpreadDataNode = readDataNodes;
820 1.22 oster tmpreadParityNode = readParityNodes;
821 1.22 oster tmpwriteDataNode = writeDataNodes;
822 1.22 oster tmpqNode = qNodes;
823 1.22 oster tmpreadQNode = readQNodes;
824 1.3 oster for (i = 0; i < numParityNodes; i++) {
825 1.3 oster /* note: no wakeup func for xor */
826 1.22 oster rf_InitNode(tmpxorNode, rf_wait, RF_FALSE, func,
827 1.14 oster undoFunc, NULL, 1,
828 1.14 oster (numDataNodes + numParityNodes),
829 1.14 oster 7, 1, dag_h, name, allocList);
830 1.22 oster tmpxorNode->flags |= RF_DAGNODE_FLAG_YIELD;
831 1.22 oster tmpxorNode->params[0] = tmpreadDataNode->params[0];
832 1.22 oster tmpxorNode->params[1] = tmpreadDataNode->params[1];
833 1.22 oster tmpxorNode->params[2] = tmpreadParityNode->params[0];
834 1.22 oster tmpxorNode->params[3] = tmpreadParityNode->params[1];
835 1.22 oster tmpxorNode->params[4] = tmpwriteDataNode->params[0];
836 1.22 oster tmpxorNode->params[5] = tmpwriteDataNode->params[1];
837 1.22 oster tmpxorNode->params[6].p = raidPtr;
838 1.3 oster /* use old parity buf as target buf */
839 1.22 oster tmpxorNode->results[0] = tmpreadParityNode->params[1].p;
840 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
841 1.3 oster if (nfaults == 2) {
842 1.3 oster /* note: no wakeup func for qor */
843 1.22 oster rf_InitNode(tmpqNode, rf_wait, RF_FALSE,
844 1.14 oster qfunc, undoFunc, NULL, 1,
845 1.14 oster (numDataNodes + numParityNodes),
846 1.14 oster 7, 1, dag_h, qname, allocList);
847 1.22 oster tmpqNode->params[0] = tmpreadDataNode->params[0];
848 1.22 oster tmpqNode->params[1] = tmpreadDataNode->params[1];
849 1.22 oster tmpqNode->params[2] = tmpreadQNode->.params[0];
850 1.22 oster tmpqNode->params[3] = tmpreadQNode->params[1];
851 1.22 oster tmpqNode->params[4] = tmpwriteDataNode->params[0];
852 1.22 oster tmpqNode->params[5] = tmpwriteDataNode->params[1];
853 1.22 oster tmpqNode->params[6].p = raidPtr;
854 1.3 oster /* use old Q buf as target buf */
855 1.22 oster tmpqNode->results[0] = tmpreadQNode->params[1].p;
856 1.22 oster tmpqNode = tmpqNode->list_next;
857 1.22 oster tmpreadQNodes = tmpreadQNodes->list_next;
858 1.3 oster }
859 1.20 oster #endif
860 1.22 oster tmpxorNode = tmpxorNode->list_next;
861 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
862 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
863 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
864 1.3 oster }
865 1.3 oster } else {
866 1.3 oster /* there is only one xor node in this case */
867 1.22 oster rf_InitNode(xorNodes, rf_wait, RF_FALSE, func,
868 1.14 oster undoFunc, NULL, 1, (numDataNodes + numParityNodes),
869 1.14 oster (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
870 1.14 oster dag_h, name, allocList);
871 1.22 oster xorNodes->flags |= RF_DAGNODE_FLAG_YIELD;
872 1.22 oster tmpreadDataNode = readDataNodes;
873 1.22 oster for (i = 0; i < numDataNodes; i++) { /* used to be"numDataNodes + 1" until we factored
874 1.22 oster out the "+1" into the "deal with Rop separately below */
875 1.22 oster /* set up params related to Rod nodes */
876 1.22 oster xorNodes->params[2 * i + 0] = tmpreadDataNode->params[0]; /* pda */
877 1.22 oster xorNodes->params[2 * i + 1] = tmpreadDataNode->params[1]; /* buffer ptr */
878 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
879 1.22 oster }
880 1.22 oster /* deal with Rop separately */
881 1.22 oster xorNodes->params[2 * numDataNodes + 0] = readParityNodes->params[0]; /* pda */
882 1.22 oster xorNodes->params[2 * numDataNodes + 1] = readParityNodes->params[1]; /* buffer ptr */
883 1.22 oster
884 1.22 oster tmpwriteDataNode = writeDataNodes;
885 1.3 oster for (i = 0; i < numDataNodes; i++) {
886 1.3 oster /* set up params related to Wnd and Wnp nodes */
887 1.22 oster xorNodes->params[2 * (numDataNodes + 1 + i) + 0] = /* pda */
888 1.22 oster tmpwriteDataNode->params[0];
889 1.22 oster xorNodes->params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */
890 1.22 oster tmpwriteDataNode->params[1];
891 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
892 1.3 oster }
893 1.3 oster /* xor node needs to get at RAID information */
894 1.22 oster xorNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
895 1.22 oster xorNodes->results[0] = readParityNodes->params[1].p;
896 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
897 1.3 oster if (nfaults == 2) {
898 1.22 oster rf_InitNode(qNodes, rf_wait, RF_FALSE, qfunc,
899 1.14 oster undoFunc, NULL, 1,
900 1.14 oster (numDataNodes + numParityNodes),
901 1.14 oster (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
902 1.14 oster dag_h, qname, allocList);
903 1.22 oster tmpreadDataNode = readDataNodes;
904 1.3 oster for (i = 0; i < numDataNodes; i++) {
905 1.3 oster /* set up params related to Rod */
906 1.22 oster qNodes->params[2 * i + 0] = tmpreadDataNode->params[0]; /* pda */
907 1.22 oster qNodes->params[2 * i + 1] = tmpreadDataNode->params[1]; /* buffer ptr */
908 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
909 1.3 oster }
910 1.3 oster /* and read old q */
911 1.22 oster qNodes->params[2 * numDataNodes + 0] = /* pda */
912 1.22 oster readQNodes->params[0];
913 1.22 oster qNodes->params[2 * numDataNodes + 1] = /* buffer ptr */
914 1.22 oster readQNodes->params[1];
915 1.22 oster tmpwriteDataNode = writeDataNodes;
916 1.3 oster for (i = 0; i < numDataNodes; i++) {
917 1.3 oster /* set up params related to Wnd nodes */
918 1.22 oster qNodes->params[2 * (numDataNodes + 1 + i) + 0] = /* pda */
919 1.22 oster tmpwriteDataNode->params[0];
920 1.22 oster qNodes->params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */
921 1.22 oster tmpwriteDataNode->params[1];
922 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
923 1.3 oster }
924 1.3 oster /* xor node needs to get at RAID information */
925 1.22 oster qNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
926 1.22 oster qNodes->results[0] = readQNodes->params[1].p;
927 1.3 oster }
928 1.20 oster #endif
929 1.3 oster }
930 1.3 oster
931 1.3 oster /* initialize nodes which write new parity (Wnp) */
932 1.3 oster pda = asmap->parityInfo;
933 1.22 oster tmpwriteParityNode = writeParityNodes;
934 1.22 oster tmpxorNode = xorNodes;
935 1.3 oster for (i = 0; i < numParityNodes; i++) {
936 1.22 oster rf_InitNode(tmpwriteParityNode, rf_wait, RF_FALSE,
937 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
938 1.14 oster rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
939 1.14 oster "Wnp", allocList);
940 1.3 oster RF_ASSERT(pda != NULL);
941 1.22 oster tmpwriteParityNode->params[0].p = pda; /* param 1 (bufPtr)
942 1.22 oster * filled in by xor node */
943 1.22 oster tmpwriteParityNode->params[1].p = tmpxorNode->results[0]; /* buffer pointer for
944 1.22 oster * parity write
945 1.22 oster * operation */
946 1.22 oster tmpwriteParityNode->params[2].v = parityStripeID;
947 1.22 oster tmpwriteParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
948 1.17 oster which_ru);
949 1.3 oster pda = pda->next;
950 1.22 oster tmpwriteParityNode = tmpwriteParityNode->list_next;
951 1.22 oster tmpxorNode = tmpxorNode->list_next;
952 1.3 oster }
953 1.3 oster
954 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
955 1.3 oster /* initialize nodes which write new Q (Wnq) */
956 1.3 oster if (nfaults == 2) {
957 1.3 oster pda = asmap->qInfo;
958 1.22 oster tmpwriteQNode = writeQNodes;
959 1.22 oster tmpqNode = qNodes;
960 1.3 oster for (i = 0; i < numParityNodes; i++) {
961 1.22 oster rf_InitNode(tmpwriteQNode, rf_wait, RF_FALSE,
962 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
963 1.14 oster rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
964 1.14 oster "Wnq", allocList);
965 1.3 oster RF_ASSERT(pda != NULL);
966 1.22 oster tmpwriteQNode->params[0].p = pda; /* param 1 (bufPtr)
967 1.3 oster * filled in by xor node */
968 1.22 oster tmpwriteQNode->params[1].p = tmpqNode->results[0]; /* buffer pointer for
969 1.3 oster * parity write
970 1.3 oster * operation */
971 1.22 oster tmpwriteQNode->params[2].v = parityStripeID;
972 1.22 oster tmpwriteQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
973 1.17 oster which_ru);
974 1.3 oster pda = pda->next;
975 1.22 oster tmpwriteQNode = tmpwriteQNode->list_next;
976 1.22 oster tmpqNode = tmpqNode->list_next;
977 1.3 oster }
978 1.3 oster }
979 1.20 oster #endif
980 1.3 oster /*
981 1.3 oster * Step 4. connect the nodes.
982 1.3 oster */
983 1.3 oster
984 1.3 oster /* connect header to block node */
985 1.3 oster dag_h->succedents[0] = blockNode;
986 1.3 oster
987 1.3 oster /* connect block node to read old data nodes */
988 1.3 oster RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
989 1.22 oster tmpreadDataNode = readDataNodes;
990 1.3 oster for (i = 0; i < numDataNodes; i++) {
991 1.22 oster blockNode->succedents[i] = tmpreadDataNode;
992 1.22 oster RF_ASSERT(tmpreadDataNode->numAntecedents == 1);
993 1.22 oster tmpreadDataNode->antecedents[0] = blockNode;
994 1.22 oster tmpreadDataNode->antType[0] = rf_control;
995 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
996 1.3 oster }
997 1.3 oster
998 1.3 oster /* connect block node to read old parity nodes */
999 1.22 oster tmpreadParityNode = readParityNodes;
1000 1.3 oster for (i = 0; i < numParityNodes; i++) {
1001 1.22 oster blockNode->succedents[numDataNodes + i] = tmpreadParityNode;
1002 1.22 oster RF_ASSERT(tmpreadParityNode->numAntecedents == 1);
1003 1.22 oster tmpreadParityNode->antecedents[0] = blockNode;
1004 1.22 oster tmpreadParityNode->antType[0] = rf_control;
1005 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
1006 1.3 oster }
1007 1.3 oster
1008 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1009 1.3 oster /* connect block node to read old Q nodes */
1010 1.3 oster if (nfaults == 2) {
1011 1.22 oster tmpreadQNode = readQNodes;
1012 1.3 oster for (i = 0; i < numParityNodes; i++) {
1013 1.22 oster blockNode->succedents[numDataNodes + numParityNodes + i] = tmpreadQNode;
1014 1.22 oster RF_ASSERT(tmpreadQNode->numAntecedents == 1);
1015 1.22 oster tmpreadQNode->antecedents[0] = blockNode;
1016 1.22 oster tmpreadQNode->antType[0] = rf_control;
1017 1.22 oster tmpreadQNode = tmpreadQNode->list_next;
1018 1.3 oster }
1019 1.3 oster }
1020 1.20 oster #endif
1021 1.3 oster /* connect read old data nodes to xor nodes */
1022 1.22 oster tmpreadDataNode = readDataNodes;
1023 1.3 oster for (i = 0; i < numDataNodes; i++) {
1024 1.22 oster RF_ASSERT(tmpreadDataNode->numSuccedents == (nfaults * numParityNodes));
1025 1.22 oster tmpxorNode = xorNodes;
1026 1.3 oster for (j = 0; j < numParityNodes; j++) {
1027 1.22 oster RF_ASSERT(tmpxorNode->numAntecedents == numDataNodes + numParityNodes);
1028 1.22 oster tmpreadDataNode->succedents[j] = tmpxorNode;
1029 1.22 oster tmpxorNode->antecedents[i] = tmpreadDataNode;
1030 1.22 oster tmpxorNode->antType[i] = rf_trueData;
1031 1.22 oster tmpxorNode = tmpxorNode->list_next;
1032 1.3 oster }
1033 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
1034 1.3 oster }
1035 1.3 oster
1036 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1037 1.3 oster /* connect read old data nodes to q nodes */
1038 1.3 oster if (nfaults == 2) {
1039 1.22 oster tmpreadDataNode = readDataNodes;
1040 1.3 oster for (i = 0; i < numDataNodes; i++) {
1041 1.22 oster tmpqNode = qNodes;
1042 1.3 oster for (j = 0; j < numParityNodes; j++) {
1043 1.22 oster RF_ASSERT(tmpqNode->numAntecedents == numDataNodes + numParityNodes);
1044 1.22 oster tmpreadDataNode->succedents[numParityNodes + j] = tmpqNode;
1045 1.22 oster tmpqNode->antecedents[i] = tmpreadDataNode;
1046 1.22 oster tmpqNode->antType[i] = rf_trueData;
1047 1.22 oster tmpqNode = tmpqNode->list_next;
1048 1.3 oster }
1049 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
1050 1.3 oster }
1051 1.3 oster }
1052 1.20 oster #endif
1053 1.3 oster /* connect read old parity nodes to xor nodes */
1054 1.22 oster tmpreadParityNode = readParityNodes;
1055 1.3 oster for (i = 0; i < numParityNodes; i++) {
1056 1.22 oster RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1057 1.22 oster tmpxorNode = xorNodes;
1058 1.3 oster for (j = 0; j < numParityNodes; j++) {
1059 1.22 oster tmpreadParityNode->succedents[j] = tmpxorNode;
1060 1.22 oster tmpxorNode->antecedents[numDataNodes + i] = tmpreadParityNode;
1061 1.22 oster tmpxorNode->antType[numDataNodes + i] = rf_trueData;
1062 1.22 oster tmpxorNode = tmpxorNode->list_next;
1063 1.3 oster }
1064 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
1065 1.3 oster }
1066 1.3 oster
1067 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1068 1.3 oster /* connect read old q nodes to q nodes */
1069 1.3 oster if (nfaults == 2) {
1070 1.22 oster tmpreadParityNode = readParityNodes;
1071 1.22 oster tmpreadQNode = readQNodes;
1072 1.3 oster for (i = 0; i < numParityNodes; i++) {
1073 1.22 oster RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1074 1.22 oster tmpqNode = qNodes;
1075 1.3 oster for (j = 0; j < numParityNodes; j++) {
1076 1.22 oster tmpreadQNode->succedents[j] = tmpqNode;
1077 1.22 oster tmpqNode->antecedents[numDataNodes + i] = tmpreadQNodes;
1078 1.22 oster tmpqNode->antType[numDataNodes + i] = rf_trueData;
1079 1.22 oster tmpqNode = tmpqNode->list_next;
1080 1.3 oster }
1081 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
1082 1.22 oster tmpreadQNode = tmpreadQNode->list_next;
1083 1.3 oster }
1084 1.3 oster }
1085 1.20 oster #endif
1086 1.3 oster /* connect xor nodes to commit node */
1087 1.3 oster RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
1088 1.22 oster tmpxorNode = xorNodes;
1089 1.3 oster for (i = 0; i < numParityNodes; i++) {
1090 1.22 oster RF_ASSERT(tmpxorNode->numSuccedents == 1);
1091 1.22 oster tmpxorNode->succedents[0] = commitNode;
1092 1.22 oster commitNode->antecedents[i] = tmpxorNode;
1093 1.3 oster commitNode->antType[i] = rf_control;
1094 1.22 oster tmpxorNode = tmpxorNode->list_next;
1095 1.3 oster }
1096 1.3 oster
1097 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1098 1.3 oster /* connect q nodes to commit node */
1099 1.3 oster if (nfaults == 2) {
1100 1.22 oster tmpqNode = qNodes;
1101 1.3 oster for (i = 0; i < numParityNodes; i++) {
1102 1.22 oster RF_ASSERT(tmpqNode->numSuccedents == 1);
1103 1.22 oster tmpqNode->succedents[0] = commitNode;
1104 1.22 oster commitNode->antecedents[i + numParityNodes] = tmpqNode;
1105 1.3 oster commitNode->antType[i + numParityNodes] = rf_control;
1106 1.22 oster tmpqNode = tmpqNode->list_next;
1107 1.3 oster }
1108 1.3 oster }
1109 1.20 oster #endif
1110 1.3 oster /* connect commit node to write nodes */
1111 1.3 oster RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
1112 1.22 oster tmpwriteDataNode = writeDataNodes;
1113 1.3 oster for (i = 0; i < numDataNodes; i++) {
1114 1.22 oster RF_ASSERT(tmpwriteDataNodes->numAntecedents == 1);
1115 1.22 oster commitNode->succedents[i] = tmpwriteDataNode;
1116 1.22 oster tmpwriteDataNode->antecedents[0] = commitNode;
1117 1.22 oster tmpwriteDataNode->antType[0] = rf_trueData;
1118 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
1119 1.3 oster }
1120 1.22 oster tmpwriteParityNode = writeParityNodes;
1121 1.3 oster for (i = 0; i < numParityNodes; i++) {
1122 1.22 oster RF_ASSERT(tmpwriteParityNode->numAntecedents == 1);
1123 1.22 oster commitNode->succedents[i + numDataNodes] = tmpwriteParityNode;
1124 1.22 oster tmpwriteParityNode->antecedents[0] = commitNode;
1125 1.22 oster tmpwriteParityNode->antType[0] = rf_trueData;
1126 1.22 oster tmpwriteParityNode = tmpwriteParityNode->list_next;
1127 1.3 oster }
1128 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1129 1.3 oster if (nfaults == 2) {
1130 1.22 oster tmpwriteQNode = writeQNodes;
1131 1.3 oster for (i = 0; i < numParityNodes; i++) {
1132 1.22 oster RF_ASSERT(tmpwriteQNode->numAntecedents == 1);
1133 1.22 oster commitNode->succedents[i + numDataNodes + numParityNodes] = tmpwriteQNode;
1134 1.22 oster tmpwriteQNode->antecedents[0] = commitNode;
1135 1.22 oster tmpwriteQNode->antType[0] = rf_trueData;
1136 1.22 oster tmpwriteQNode = tmpwriteQNode->list_next;
1137 1.3 oster }
1138 1.3 oster }
1139 1.20 oster #endif
1140 1.3 oster RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1141 1.3 oster RF_ASSERT(termNode->numSuccedents == 0);
1142 1.22 oster tmpwriteDataNode = writeDataNodes;
1143 1.3 oster for (i = 0; i < numDataNodes; i++) {
1144 1.16 oster /* connect write new data nodes to term node */
1145 1.22 oster RF_ASSERT(tmpwriteDataNode->numSuccedents == 1);
1146 1.16 oster RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1147 1.22 oster tmpwriteDataNode->succedents[0] = termNode;
1148 1.22 oster termNode->antecedents[i] = tmpwriteDataNode;
1149 1.16 oster termNode->antType[i] = rf_control;
1150 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
1151 1.3 oster }
1152 1.3 oster
1153 1.22 oster tmpwriteParityNode = writeParityNodes;
1154 1.3 oster for (i = 0; i < numParityNodes; i++) {
1155 1.22 oster RF_ASSERT(tmpwriteParityNode->numSuccedents == 1);
1156 1.22 oster tmpwriteParityNode->succedents[0] = termNode;
1157 1.22 oster termNode->antecedents[numDataNodes + i] = tmpwriteParityNode;
1158 1.16 oster termNode->antType[numDataNodes + i] = rf_control;
1159 1.22 oster tmpwriteParityNode = tmpwriteParityNode->list_next;
1160 1.3 oster }
1161 1.3 oster
1162 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1163 1.3 oster if (nfaults == 2) {
1164 1.22 oster tmpwriteQNode = writeQNodes;
1165 1.3 oster for (i = 0; i < numParityNodes; i++) {
1166 1.22 oster RF_ASSERT(tmpwriteQNode->numSuccedents == 1);
1167 1.22 oster tmpwriteQNode->succedents[0] = termNode;
1168 1.22 oster termNode->antecedents[numDataNodes + numParityNodes + i] = tmpwriteQNode;
1169 1.16 oster termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
1170 1.22 oster tmpwriteQNode = tmpwriteQNode->list_next;
1171 1.3 oster }
1172 1.3 oster }
1173 1.20 oster #endif
1174 1.1 oster }
1175 1.1 oster
1176 1.1 oster
1177 1.1 oster /******************************************************************************
1178 1.1 oster * create a write graph (fault-free or degraded) for RAID level 1
1179 1.1 oster *
1180 1.1 oster * Hdr -> Commit -> Wpd -> Nil -> Trm
1181 1.1 oster * -> Wsd ->
1182 1.1 oster *
1183 1.1 oster * The "Wpd" node writes data to the primary copy in the mirror pair
1184 1.1 oster * The "Wsd" node writes data to the secondary copy in the mirror pair
1185 1.1 oster *
1186 1.1 oster * Parameters: raidPtr - description of the physical array
1187 1.1 oster * asmap - logical & physical addresses for this access
1188 1.1 oster * bp - buffer ptr (holds write data)
1189 1.3 oster * flags - general flags (e.g. disk locking)
1190 1.1 oster * allocList - list of memory allocated in DAG creation
1191 1.1 oster *****************************************************************************/
1192 1.1 oster
1193 1.3 oster void
1194 1.13 oster rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
1195 1.13 oster RF_DagHeader_t *dag_h, void *bp,
1196 1.13 oster RF_RaidAccessFlags_t flags,
1197 1.13 oster RF_AllocListElem_t *allocList)
1198 1.1 oster {
1199 1.3 oster RF_DagNode_t *unblockNode, *termNode, *commitNode;
1200 1.22 oster RF_DagNode_t *wndNode, *wmirNode;
1201 1.22 oster RF_DagNode_t *tmpNode, *tmpwndNode, *tmpwmirNode;
1202 1.3 oster int nWndNodes, nWmirNodes, i;
1203 1.3 oster RF_ReconUnitNum_t which_ru;
1204 1.3 oster RF_PhysDiskAddr_t *pda, *pdaP;
1205 1.3 oster RF_StripeNum_t parityStripeID;
1206 1.3 oster
1207 1.3 oster parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
1208 1.3 oster asmap->raidAddress, &which_ru);
1209 1.19 oster #if RF_DEBUG_DAG
1210 1.3 oster if (rf_dagDebug) {
1211 1.3 oster printf("[Creating RAID level 1 write DAG]\n");
1212 1.3 oster }
1213 1.19 oster #endif
1214 1.3 oster dag_h->creator = "RaidOneWriteDAG";
1215 1.3 oster
1216 1.3 oster /* 2 implies access not SU aligned */
1217 1.3 oster nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
1218 1.3 oster nWndNodes = (asmap->physInfo->next) ? 2 : 1;
1219 1.3 oster
1220 1.3 oster /* alloc the Wnd nodes and the Wmir node */
1221 1.3 oster if (asmap->numDataFailed == 1)
1222 1.3 oster nWndNodes--;
1223 1.3 oster if (asmap->numParityFailed == 1)
1224 1.3 oster nWmirNodes--;
1225 1.3 oster
1226 1.3 oster /* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
1227 1.3 oster * + terminator) */
1228 1.22 oster for (i = 0; i < nWndNodes; i++) {
1229 1.22 oster tmpNode = rf_AllocDAGNode();
1230 1.22 oster tmpNode->list_next = dag_h->nodes;
1231 1.22 oster dag_h->nodes = tmpNode;
1232 1.22 oster }
1233 1.22 oster wndNode = dag_h->nodes;
1234 1.22 oster
1235 1.22 oster for (i = 0; i < nWmirNodes; i++) {
1236 1.22 oster tmpNode = rf_AllocDAGNode();
1237 1.22 oster tmpNode->list_next = dag_h->nodes;
1238 1.22 oster dag_h->nodes = tmpNode;
1239 1.22 oster }
1240 1.22 oster wmirNode = dag_h->nodes;
1241 1.22 oster
1242 1.22 oster commitNode = rf_AllocDAGNode();
1243 1.22 oster commitNode->list_next = dag_h->nodes;
1244 1.22 oster dag_h->nodes = commitNode;
1245 1.22 oster
1246 1.22 oster unblockNode = rf_AllocDAGNode();
1247 1.22 oster unblockNode->list_next = dag_h->nodes;
1248 1.22 oster dag_h->nodes = unblockNode;
1249 1.22 oster
1250 1.22 oster termNode = rf_AllocDAGNode();
1251 1.22 oster termNode->list_next = dag_h->nodes;
1252 1.22 oster dag_h->nodes = termNode;
1253 1.3 oster
1254 1.3 oster /* this dag can commit immediately */
1255 1.3 oster dag_h->numCommitNodes = 1;
1256 1.3 oster dag_h->numCommits = 0;
1257 1.3 oster dag_h->numSuccedents = 1;
1258 1.3 oster
1259 1.3 oster /* initialize the commit, unblock, and term nodes */
1260 1.14 oster rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
1261 1.14 oster rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes),
1262 1.14 oster 0, 0, 0, dag_h, "Cmt", allocList);
1263 1.14 oster rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
1264 1.14 oster rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes),
1265 1.14 oster 0, 0, dag_h, "Nil", allocList);
1266 1.14 oster rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
1267 1.14 oster rf_TerminateUndoFunc, NULL, 0, 1, 0, 0,
1268 1.14 oster dag_h, "Trm", allocList);
1269 1.3 oster
1270 1.3 oster /* initialize the wnd nodes */
1271 1.3 oster if (nWndNodes > 0) {
1272 1.3 oster pda = asmap->physInfo;
1273 1.22 oster tmpwndNode = wndNode;
1274 1.3 oster for (i = 0; i < nWndNodes; i++) {
1275 1.22 oster rf_InitNode(tmpwndNode, rf_wait, RF_FALSE,
1276 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1277 1.14 oster rf_GenericWakeupFunc, 1, 1, 4, 0,
1278 1.14 oster dag_h, "Wpd", allocList);
1279 1.3 oster RF_ASSERT(pda != NULL);
1280 1.22 oster tmpwndNode->params[0].p = pda;
1281 1.22 oster tmpwndNode->params[1].p = pda->bufPtr;
1282 1.22 oster tmpwndNode->params[2].v = parityStripeID;
1283 1.22 oster tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1284 1.3 oster pda = pda->next;
1285 1.22 oster tmpwndNode = tmpwndNode->list_next;
1286 1.3 oster }
1287 1.3 oster RF_ASSERT(pda == NULL);
1288 1.3 oster }
1289 1.3 oster /* initialize the mirror nodes */
1290 1.3 oster if (nWmirNodes > 0) {
1291 1.3 oster pda = asmap->physInfo;
1292 1.3 oster pdaP = asmap->parityInfo;
1293 1.22 oster tmpwmirNode = wmirNode;
1294 1.3 oster for (i = 0; i < nWmirNodes; i++) {
1295 1.22 oster rf_InitNode(tmpwmirNode, rf_wait, RF_FALSE,
1296 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1297 1.14 oster rf_GenericWakeupFunc, 1, 1, 4, 0,
1298 1.14 oster dag_h, "Wsd", allocList);
1299 1.3 oster RF_ASSERT(pda != NULL);
1300 1.22 oster tmpwmirNode->params[0].p = pdaP;
1301 1.22 oster tmpwmirNode->params[1].p = pda->bufPtr;
1302 1.22 oster tmpwmirNode->params[2].v = parityStripeID;
1303 1.22 oster tmpwmirNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1304 1.3 oster pda = pda->next;
1305 1.3 oster pdaP = pdaP->next;
1306 1.22 oster tmpwmirNode = tmpwmirNode->list_next;
1307 1.3 oster }
1308 1.3 oster RF_ASSERT(pda == NULL);
1309 1.3 oster RF_ASSERT(pdaP == NULL);
1310 1.3 oster }
1311 1.3 oster /* link the header node to the commit node */
1312 1.3 oster RF_ASSERT(dag_h->numSuccedents == 1);
1313 1.3 oster RF_ASSERT(commitNode->numAntecedents == 0);
1314 1.3 oster dag_h->succedents[0] = commitNode;
1315 1.3 oster
1316 1.3 oster /* link the commit node to the write nodes */
1317 1.3 oster RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
1318 1.22 oster tmpwndNode = wndNode;
1319 1.3 oster for (i = 0; i < nWndNodes; i++) {
1320 1.22 oster RF_ASSERT(tmpwndNode->numAntecedents == 1);
1321 1.22 oster commitNode->succedents[i] = tmpwndNode;
1322 1.22 oster tmpwndNode->antecedents[0] = commitNode;
1323 1.22 oster tmpwndNode->antType[0] = rf_control;
1324 1.22 oster tmpwndNode = tmpwndNode->list_next;
1325 1.3 oster }
1326 1.22 oster tmpwmirNode = wmirNode;
1327 1.3 oster for (i = 0; i < nWmirNodes; i++) {
1328 1.22 oster RF_ASSERT(tmpwmirNode->numAntecedents == 1);
1329 1.22 oster commitNode->succedents[i + nWndNodes] = tmpwmirNode;
1330 1.22 oster tmpwmirNode->antecedents[0] = commitNode;
1331 1.22 oster tmpwmirNode->antType[0] = rf_control;
1332 1.22 oster tmpwmirNode = tmpwmirNode->list_next;
1333 1.3 oster }
1334 1.3 oster
1335 1.3 oster /* link the write nodes to the unblock node */
1336 1.3 oster RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
1337 1.22 oster tmpwndNode = wndNode;
1338 1.3 oster for (i = 0; i < nWndNodes; i++) {
1339 1.22 oster RF_ASSERT(tmpwndNode->numSuccedents == 1);
1340 1.22 oster tmpwndNode->succedents[0] = unblockNode;
1341 1.22 oster unblockNode->antecedents[i] = tmpwndNode;
1342 1.3 oster unblockNode->antType[i] = rf_control;
1343 1.22 oster tmpwndNode = tmpwndNode->list_next;
1344 1.3 oster }
1345 1.22 oster tmpwmirNode = wmirNode;
1346 1.3 oster for (i = 0; i < nWmirNodes; i++) {
1347 1.22 oster RF_ASSERT(tmpwmirNode->numSuccedents == 1);
1348 1.22 oster tmpwmirNode->succedents[0] = unblockNode;
1349 1.22 oster unblockNode->antecedents[i + nWndNodes] = tmpwmirNode;
1350 1.3 oster unblockNode->antType[i + nWndNodes] = rf_control;
1351 1.22 oster tmpwmirNode = tmpwmirNode->list_next;
1352 1.3 oster }
1353 1.3 oster
1354 1.3 oster /* link the unblock node to the term node */
1355 1.3 oster RF_ASSERT(unblockNode->numSuccedents == 1);
1356 1.3 oster RF_ASSERT(termNode->numAntecedents == 1);
1357 1.3 oster RF_ASSERT(termNode->numSuccedents == 0);
1358 1.3 oster unblockNode->succedents[0] = termNode;
1359 1.3 oster termNode->antecedents[0] = unblockNode;
1360 1.3 oster termNode->antType[0] = rf_control;
1361 1.1 oster }
1362