rf_dagffwr.c revision 1.33.104.1 1 1.33.104.1 rmind /* $NetBSD: rf_dagffwr.c,v 1.33.104.1 2014/05/18 17:45:46 rmind Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster
29 1.1 oster /*
30 1.1 oster * rf_dagff.c
31 1.1 oster *
32 1.1 oster * code for creating fault-free DAGs
33 1.1 oster *
34 1.1 oster */
35 1.7 lukem
36 1.7 lukem #include <sys/cdefs.h>
37 1.33.104.1 rmind __KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 1.33.104.1 2014/05/18 17:45:46 rmind Exp $");
38 1.1 oster
39 1.6 oster #include <dev/raidframe/raidframevar.h>
40 1.6 oster
41 1.1 oster #include "rf_raid.h"
42 1.1 oster #include "rf_dag.h"
43 1.1 oster #include "rf_dagutils.h"
44 1.1 oster #include "rf_dagfuncs.h"
45 1.1 oster #include "rf_debugMem.h"
46 1.1 oster #include "rf_dagffrd.h"
47 1.1 oster #include "rf_general.h"
48 1.1 oster #include "rf_dagffwr.h"
49 1.23 oster #include "rf_map.h"
50 1.1 oster
51 1.1 oster /******************************************************************************
52 1.1 oster *
53 1.1 oster * General comments on DAG creation:
54 1.3 oster *
55 1.1 oster * All DAGs in this file use roll-away error recovery. Each DAG has a single
56 1.1 oster * commit node, usually called "Cmt." If an error occurs before the Cmt node
57 1.1 oster * is reached, the execution engine will halt forward execution and work
58 1.1 oster * backward through the graph, executing the undo functions. Assuming that
59 1.1 oster * each node in the graph prior to the Cmt node are undoable and atomic - or -
60 1.1 oster * does not make changes to permanent state, the graph will fail atomically.
61 1.1 oster * If an error occurs after the Cmt node executes, the engine will roll-forward
62 1.1 oster * through the graph, blindly executing nodes until it reaches the end.
63 1.1 oster * If a graph reaches the end, it is assumed to have completed successfully.
64 1.1 oster *
65 1.1 oster * A graph has only 1 Cmt node.
66 1.1 oster *
67 1.1 oster */
68 1.1 oster
69 1.1 oster
70 1.1 oster /******************************************************************************
71 1.1 oster *
72 1.1 oster * The following wrappers map the standard DAG creation interface to the
73 1.1 oster * DAG creation routines. Additionally, these wrappers enable experimentation
74 1.1 oster * with new DAG structures by providing an extra level of indirection, allowing
75 1.1 oster * the DAG creation routines to be replaced at this single point.
76 1.1 oster */
77 1.1 oster
78 1.1 oster
79 1.29 perry void
80 1.13 oster rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
81 1.13 oster RF_DagHeader_t *dag_h, void *bp,
82 1.13 oster RF_RaidAccessFlags_t flags,
83 1.13 oster RF_AllocListElem_t *allocList,
84 1.33 christos RF_IoType_t type)
85 1.1 oster {
86 1.3 oster rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
87 1.14 oster RF_IO_TYPE_WRITE);
88 1.1 oster }
89 1.1 oster
90 1.29 perry void
91 1.13 oster rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
92 1.13 oster RF_DagHeader_t *dag_h, void *bp,
93 1.13 oster RF_RaidAccessFlags_t flags,
94 1.13 oster RF_AllocListElem_t *allocList,
95 1.33 christos RF_IoType_t type)
96 1.1 oster {
97 1.3 oster rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
98 1.14 oster RF_IO_TYPE_WRITE);
99 1.1 oster }
100 1.1 oster
101 1.29 perry void
102 1.13 oster rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
103 1.13 oster RF_DagHeader_t *dag_h, void *bp,
104 1.13 oster RF_RaidAccessFlags_t flags,
105 1.13 oster RF_AllocListElem_t *allocList)
106 1.1 oster {
107 1.3 oster /* "normal" rollaway */
108 1.29 perry rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
109 1.14 oster allocList, &rf_xorFuncs, NULL);
110 1.1 oster }
111 1.1 oster
112 1.29 perry void
113 1.13 oster rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
114 1.13 oster RF_DagHeader_t *dag_h, void *bp,
115 1.13 oster RF_RaidAccessFlags_t flags,
116 1.13 oster RF_AllocListElem_t *allocList)
117 1.1 oster {
118 1.3 oster /* "normal" rollaway */
119 1.29 perry rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
120 1.14 oster allocList, 1, rf_RegularXorFunc, RF_TRUE);
121 1.1 oster }
122 1.1 oster
123 1.1 oster
124 1.1 oster /******************************************************************************
125 1.1 oster *
126 1.1 oster * DAG creation code begins here
127 1.1 oster */
128 1.1 oster
129 1.1 oster
130 1.1 oster /******************************************************************************
131 1.1 oster *
132 1.1 oster * creates a DAG to perform a large-write operation:
133 1.1 oster *
134 1.1 oster * / Rod \ / Wnd \
135 1.1 oster * H -- block- Rod - Xor - Cmt - Wnd --- T
136 1.1 oster * \ Rod / \ Wnp /
137 1.1 oster * \[Wnq]/
138 1.1 oster *
139 1.1 oster * The XOR node also does the Q calculation in the P+Q architecture.
140 1.1 oster * All nodes are before the commit node (Cmt) are assumed to be atomic and
141 1.1 oster * undoable - or - they make no changes to permanent state.
142 1.1 oster *
143 1.1 oster * Rod = read old data
144 1.1 oster * Cmt = commit node
145 1.1 oster * Wnp = write new parity
146 1.1 oster * Wnd = write new data
147 1.1 oster * Wnq = write new "q"
148 1.1 oster * [] denotes optional segments in the graph
149 1.1 oster *
150 1.1 oster * Parameters: raidPtr - description of the physical array
151 1.1 oster * asmap - logical & physical addresses for this access
152 1.1 oster * bp - buffer ptr (holds write data)
153 1.3 oster * flags - general flags (e.g. disk locking)
154 1.1 oster * allocList - list of memory allocated in DAG creation
155 1.1 oster * nfaults - number of faults array can tolerate
156 1.1 oster * (equal to # redundancy units in stripe)
157 1.1 oster * redfuncs - list of redundancy generating functions
158 1.1 oster *
159 1.1 oster *****************************************************************************/
160 1.1 oster
161 1.29 perry void
162 1.13 oster rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
163 1.33 christos RF_DagHeader_t *dag_h, void *bp,
164 1.33 christos RF_RaidAccessFlags_t flags,
165 1.13 oster RF_AllocListElem_t *allocList,
166 1.13 oster int nfaults, int (*redFunc) (RF_DagNode_t *),
167 1.13 oster int allowBufferRecycle)
168 1.1 oster {
169 1.22 oster RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode;
170 1.33.104.1 rmind RF_DagNode_t *blockNode, *commitNode, *termNode;
171 1.33.104.1 rmind #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
172 1.33.104.1 rmind RF_DagNode_t *wnqNode;
173 1.33.104.1 rmind #endif
174 1.3 oster int nWndNodes, nRodNodes, i, nodeNum, asmNum;
175 1.3 oster RF_AccessStripeMapHeader_t *new_asm_h[2];
176 1.3 oster RF_StripeNum_t parityStripeID;
177 1.3 oster char *sosBuffer, *eosBuffer;
178 1.3 oster RF_ReconUnitNum_t which_ru;
179 1.3 oster RF_RaidLayout_t *layoutPtr;
180 1.3 oster RF_PhysDiskAddr_t *pda;
181 1.3 oster
182 1.3 oster layoutPtr = &(raidPtr->Layout);
183 1.29 perry parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
184 1.14 oster asmap->raidAddress,
185 1.14 oster &which_ru);
186 1.3 oster
187 1.19 oster #if RF_DEBUG_DAG
188 1.3 oster if (rf_dagDebug) {
189 1.3 oster printf("[Creating large-write DAG]\n");
190 1.3 oster }
191 1.19 oster #endif
192 1.3 oster dag_h->creator = "LargeWriteDAG";
193 1.3 oster
194 1.3 oster dag_h->numCommitNodes = 1;
195 1.3 oster dag_h->numCommits = 0;
196 1.3 oster dag_h->numSuccedents = 1;
197 1.3 oster
198 1.3 oster /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */
199 1.3 oster nWndNodes = asmap->numStripeUnitsAccessed;
200 1.22 oster
201 1.22 oster for (i = 0; i < nWndNodes; i++) {
202 1.22 oster tmpNode = rf_AllocDAGNode();
203 1.22 oster tmpNode->list_next = dag_h->nodes;
204 1.22 oster dag_h->nodes = tmpNode;
205 1.22 oster }
206 1.22 oster wndNodes = dag_h->nodes;
207 1.22 oster
208 1.22 oster xorNode = rf_AllocDAGNode();
209 1.22 oster xorNode->list_next = dag_h->nodes;
210 1.22 oster dag_h->nodes = xorNode;
211 1.22 oster
212 1.22 oster wnpNode = rf_AllocDAGNode();
213 1.22 oster wnpNode->list_next = dag_h->nodes;
214 1.22 oster dag_h->nodes = wnpNode;
215 1.22 oster
216 1.22 oster blockNode = rf_AllocDAGNode();
217 1.22 oster blockNode->list_next = dag_h->nodes;
218 1.22 oster dag_h->nodes = blockNode;
219 1.22 oster
220 1.22 oster commitNode = rf_AllocDAGNode();
221 1.22 oster commitNode->list_next = dag_h->nodes;
222 1.22 oster dag_h->nodes = commitNode;
223 1.22 oster
224 1.22 oster termNode = rf_AllocDAGNode();
225 1.22 oster termNode->list_next = dag_h->nodes;
226 1.22 oster dag_h->nodes = termNode;
227 1.22 oster
228 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
229 1.3 oster if (nfaults == 2) {
230 1.22 oster wnqNode = rf_AllocDAGNode();
231 1.3 oster } else {
232 1.3 oster wnqNode = NULL;
233 1.3 oster }
234 1.20 oster #endif
235 1.29 perry rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
236 1.29 perry new_asm_h, &nRodNodes, &sosBuffer,
237 1.14 oster &eosBuffer, allocList);
238 1.3 oster if (nRodNodes > 0) {
239 1.22 oster for (i = 0; i < nRodNodes; i++) {
240 1.22 oster tmpNode = rf_AllocDAGNode();
241 1.22 oster tmpNode->list_next = dag_h->nodes;
242 1.22 oster dag_h->nodes = tmpNode;
243 1.22 oster }
244 1.22 oster rodNodes = dag_h->nodes;
245 1.3 oster } else {
246 1.3 oster rodNodes = NULL;
247 1.3 oster }
248 1.3 oster
249 1.3 oster /* begin node initialization */
250 1.3 oster if (nRodNodes > 0) {
251 1.29 perry rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
252 1.29 perry rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0,
253 1.14 oster dag_h, "Nil", allocList);
254 1.3 oster } else {
255 1.29 perry rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
256 1.29 perry rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0,
257 1.14 oster dag_h, "Nil", allocList);
258 1.3 oster }
259 1.3 oster
260 1.29 perry rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
261 1.29 perry rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0,
262 1.14 oster dag_h, "Cmt", allocList);
263 1.29 perry rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
264 1.29 perry rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0,
265 1.14 oster dag_h, "Trm", allocList);
266 1.3 oster
267 1.3 oster /* initialize the Rod nodes */
268 1.22 oster tmpNode = rodNodes;
269 1.3 oster for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
270 1.3 oster if (new_asm_h[asmNum]) {
271 1.3 oster pda = new_asm_h[asmNum]->stripeMap->physInfo;
272 1.3 oster while (pda) {
273 1.29 perry rf_InitNode(tmpNode, rf_wait,
274 1.14 oster RF_FALSE, rf_DiskReadFunc,
275 1.29 perry rf_DiskReadUndoFunc,
276 1.29 perry rf_GenericWakeupFunc,
277 1.14 oster 1, 1, 4, 0, dag_h,
278 1.14 oster "Rod", allocList);
279 1.22 oster tmpNode->params[0].p = pda;
280 1.22 oster tmpNode->params[1].p = pda->bufPtr;
281 1.22 oster tmpNode->params[2].v = parityStripeID;
282 1.22 oster tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
283 1.17 oster which_ru);
284 1.3 oster nodeNum++;
285 1.3 oster pda = pda->next;
286 1.22 oster tmpNode = tmpNode->list_next;
287 1.3 oster }
288 1.3 oster }
289 1.3 oster }
290 1.3 oster RF_ASSERT(nodeNum == nRodNodes);
291 1.3 oster
292 1.3 oster /* initialize the wnd nodes */
293 1.3 oster pda = asmap->physInfo;
294 1.22 oster tmpNode = wndNodes;
295 1.3 oster for (i = 0; i < nWndNodes; i++) {
296 1.29 perry rf_InitNode(tmpNode, rf_wait, RF_FALSE,
297 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
298 1.29 perry rf_GenericWakeupFunc, 1, 1, 4, 0,
299 1.14 oster dag_h, "Wnd", allocList);
300 1.3 oster RF_ASSERT(pda != NULL);
301 1.22 oster tmpNode->params[0].p = pda;
302 1.22 oster tmpNode->params[1].p = pda->bufPtr;
303 1.22 oster tmpNode->params[2].v = parityStripeID;
304 1.22 oster tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
305 1.3 oster pda = pda->next;
306 1.22 oster tmpNode = tmpNode->list_next;
307 1.3 oster }
308 1.3 oster
309 1.3 oster /* initialize the redundancy node */
310 1.3 oster if (nRodNodes > 0) {
311 1.29 perry rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
312 1.14 oster rf_NullNodeUndoFunc, NULL, 1,
313 1.29 perry nRodNodes, 2 * (nWndNodes + nRodNodes) + 1,
314 1.14 oster nfaults, dag_h, "Xr ", allocList);
315 1.3 oster } else {
316 1.29 perry rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
317 1.14 oster rf_NullNodeUndoFunc, NULL, 1,
318 1.29 perry 1, 2 * (nWndNodes + nRodNodes) + 1,
319 1.14 oster nfaults, dag_h, "Xr ", allocList);
320 1.3 oster }
321 1.3 oster xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
322 1.22 oster tmpNode = wndNodes;
323 1.3 oster for (i = 0; i < nWndNodes; i++) {
324 1.14 oster /* pda */
325 1.22 oster xorNode->params[2 * i + 0] = tmpNode->params[0];
326 1.29 perry /* buf ptr */
327 1.22 oster xorNode->params[2 * i + 1] = tmpNode->params[1];
328 1.22 oster tmpNode = tmpNode->list_next;
329 1.3 oster }
330 1.22 oster tmpNode = rodNodes;
331 1.3 oster for (i = 0; i < nRodNodes; i++) {
332 1.14 oster /* pda */
333 1.22 oster xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0];
334 1.14 oster /* buf ptr */
335 1.22 oster xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1];
336 1.22 oster tmpNode = tmpNode->list_next;
337 1.3 oster }
338 1.3 oster /* xor node needs to get at RAID information */
339 1.3 oster xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
340 1.3 oster
341 1.3 oster /*
342 1.14 oster * Look for an Rod node that reads a complete SU. If none,
343 1.14 oster * alloc a buffer to receive the parity info. Note that we
344 1.14 oster * can't use a new data buffer because it will not have gotten
345 1.14 oster * written when the xor occurs. */
346 1.3 oster if (allowBufferRecycle) {
347 1.22 oster tmpNode = rodNodes;
348 1.3 oster for (i = 0; i < nRodNodes; i++) {
349 1.22 oster if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
350 1.3 oster break;
351 1.22 oster tmpNode = tmpNode->list_next;
352 1.3 oster }
353 1.3 oster }
354 1.3 oster if ((!allowBufferRecycle) || (i == nRodNodes)) {
355 1.27 oster xorNode->results[0] = rf_AllocBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit));
356 1.3 oster } else {
357 1.22 oster /* this works because the only way we get here is if
358 1.22 oster allowBufferRecycle is true and we went through the
359 1.22 oster above for loop, and exited via the break before
360 1.22 oster i==nRodNodes was true. That means tmpNode will
361 1.22 oster still point to a valid node -- the one we want for
362 1.22 oster here! */
363 1.22 oster xorNode->results[0] = tmpNode->params[1].p;
364 1.3 oster }
365 1.3 oster
366 1.3 oster /* initialize the Wnp node */
367 1.29 perry rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
368 1.29 perry rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
369 1.14 oster dag_h, "Wnp", allocList);
370 1.3 oster wnpNode->params[0].p = asmap->parityInfo;
371 1.3 oster wnpNode->params[1].p = xorNode->results[0];
372 1.3 oster wnpNode->params[2].v = parityStripeID;
373 1.17 oster wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
374 1.3 oster /* parityInfo must describe entire parity unit */
375 1.3 oster RF_ASSERT(asmap->parityInfo->next == NULL);
376 1.3 oster
377 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
378 1.3 oster if (nfaults == 2) {
379 1.3 oster /*
380 1.3 oster * We never try to recycle a buffer for the Q calcuation
381 1.3 oster * in addition to the parity. This would cause two buffers
382 1.3 oster * to get smashed during the P and Q calculation, guaranteeing
383 1.3 oster * one would be wrong.
384 1.3 oster */
385 1.12 oster RF_MallocAndAdd(xorNode->results[1],
386 1.12 oster rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
387 1.12 oster (void *), allocList);
388 1.29 perry rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
389 1.29 perry rf_DiskWriteUndoFunc, rf_GenericWakeupFunc,
390 1.14 oster 1, 1, 4, 0, dag_h, "Wnq", allocList);
391 1.3 oster wnqNode->params[0].p = asmap->qInfo;
392 1.3 oster wnqNode->params[1].p = xorNode->results[1];
393 1.3 oster wnqNode->params[2].v = parityStripeID;
394 1.17 oster wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
395 1.3 oster /* parityInfo must describe entire parity unit */
396 1.3 oster RF_ASSERT(asmap->parityInfo->next == NULL);
397 1.3 oster }
398 1.20 oster #endif
399 1.3 oster /*
400 1.3 oster * Connect nodes to form graph.
401 1.3 oster */
402 1.3 oster
403 1.3 oster /* connect dag header to block node */
404 1.3 oster RF_ASSERT(blockNode->numAntecedents == 0);
405 1.3 oster dag_h->succedents[0] = blockNode;
406 1.3 oster
407 1.3 oster if (nRodNodes > 0) {
408 1.3 oster /* connect the block node to the Rod nodes */
409 1.3 oster RF_ASSERT(blockNode->numSuccedents == nRodNodes);
410 1.3 oster RF_ASSERT(xorNode->numAntecedents == nRodNodes);
411 1.22 oster tmpNode = rodNodes;
412 1.3 oster for (i = 0; i < nRodNodes; i++) {
413 1.28 oster RF_ASSERT(tmpNode->numAntecedents == 1);
414 1.22 oster blockNode->succedents[i] = tmpNode;
415 1.22 oster tmpNode->antecedents[0] = blockNode;
416 1.22 oster tmpNode->antType[0] = rf_control;
417 1.3 oster
418 1.3 oster /* connect the Rod nodes to the Xor node */
419 1.28 oster RF_ASSERT(tmpNode->numSuccedents == 1);
420 1.22 oster tmpNode->succedents[0] = xorNode;
421 1.22 oster xorNode->antecedents[i] = tmpNode;
422 1.3 oster xorNode->antType[i] = rf_trueData;
423 1.22 oster tmpNode = tmpNode->list_next;
424 1.3 oster }
425 1.3 oster } else {
426 1.3 oster /* connect the block node to the Xor node */
427 1.3 oster RF_ASSERT(blockNode->numSuccedents == 1);
428 1.3 oster RF_ASSERT(xorNode->numAntecedents == 1);
429 1.3 oster blockNode->succedents[0] = xorNode;
430 1.3 oster xorNode->antecedents[0] = blockNode;
431 1.3 oster xorNode->antType[0] = rf_control;
432 1.3 oster }
433 1.3 oster
434 1.3 oster /* connect the xor node to the commit node */
435 1.3 oster RF_ASSERT(xorNode->numSuccedents == 1);
436 1.3 oster RF_ASSERT(commitNode->numAntecedents == 1);
437 1.3 oster xorNode->succedents[0] = commitNode;
438 1.3 oster commitNode->antecedents[0] = xorNode;
439 1.3 oster commitNode->antType[0] = rf_control;
440 1.3 oster
441 1.3 oster /* connect the commit node to the write nodes */
442 1.3 oster RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
443 1.22 oster tmpNode = wndNodes;
444 1.3 oster for (i = 0; i < nWndNodes; i++) {
445 1.3 oster RF_ASSERT(wndNodes->numAntecedents == 1);
446 1.22 oster commitNode->succedents[i] = tmpNode;
447 1.22 oster tmpNode->antecedents[0] = commitNode;
448 1.22 oster tmpNode->antType[0] = rf_control;
449 1.22 oster tmpNode = tmpNode->list_next;
450 1.3 oster }
451 1.3 oster RF_ASSERT(wnpNode->numAntecedents == 1);
452 1.3 oster commitNode->succedents[nWndNodes] = wnpNode;
453 1.3 oster wnpNode->antecedents[0] = commitNode;
454 1.3 oster wnpNode->antType[0] = rf_trueData;
455 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
456 1.3 oster if (nfaults == 2) {
457 1.3 oster RF_ASSERT(wnqNode->numAntecedents == 1);
458 1.3 oster commitNode->succedents[nWndNodes + 1] = wnqNode;
459 1.3 oster wnqNode->antecedents[0] = commitNode;
460 1.3 oster wnqNode->antType[0] = rf_trueData;
461 1.3 oster }
462 1.20 oster #endif
463 1.3 oster /* connect the write nodes to the term node */
464 1.3 oster RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
465 1.3 oster RF_ASSERT(termNode->numSuccedents == 0);
466 1.22 oster tmpNode = wndNodes;
467 1.3 oster for (i = 0; i < nWndNodes; i++) {
468 1.3 oster RF_ASSERT(wndNodes->numSuccedents == 1);
469 1.22 oster tmpNode->succedents[0] = termNode;
470 1.22 oster termNode->antecedents[i] = tmpNode;
471 1.3 oster termNode->antType[i] = rf_control;
472 1.22 oster tmpNode = tmpNode->list_next;
473 1.3 oster }
474 1.3 oster RF_ASSERT(wnpNode->numSuccedents == 1);
475 1.3 oster wnpNode->succedents[0] = termNode;
476 1.3 oster termNode->antecedents[nWndNodes] = wnpNode;
477 1.3 oster termNode->antType[nWndNodes] = rf_control;
478 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
479 1.3 oster if (nfaults == 2) {
480 1.3 oster RF_ASSERT(wnqNode->numSuccedents == 1);
481 1.3 oster wnqNode->succedents[0] = termNode;
482 1.3 oster termNode->antecedents[nWndNodes + 1] = wnqNode;
483 1.3 oster termNode->antType[nWndNodes + 1] = rf_control;
484 1.3 oster }
485 1.20 oster #endif
486 1.1 oster }
487 1.1 oster /******************************************************************************
488 1.1 oster *
489 1.1 oster * creates a DAG to perform a small-write operation (either raid 5 or pq),
490 1.1 oster * which is as follows:
491 1.1 oster *
492 1.1 oster * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
493 1.1 oster * \- Rod X / \----> Wnd [Und]-/
494 1.1 oster * [\- Rod X / \---> Wnd [Und]-/]
495 1.1 oster * [\- Roq -> Q / \--> Wnq [Unq]-/]
496 1.1 oster *
497 1.1 oster * Rop = read old parity
498 1.1 oster * Rod = read old data
499 1.1 oster * Roq = read old "q"
500 1.1 oster * Cmt = commit node
501 1.1 oster * Und = unlock data disk
502 1.1 oster * Unp = unlock parity disk
503 1.1 oster * Unq = unlock q disk
504 1.1 oster * Wnp = write new parity
505 1.1 oster * Wnd = write new data
506 1.1 oster * Wnq = write new "q"
507 1.1 oster * [ ] denotes optional segments in the graph
508 1.1 oster *
509 1.1 oster * Parameters: raidPtr - description of the physical array
510 1.1 oster * asmap - logical & physical addresses for this access
511 1.1 oster * bp - buffer ptr (holds write data)
512 1.3 oster * flags - general flags (e.g. disk locking)
513 1.1 oster * allocList - list of memory allocated in DAG creation
514 1.1 oster * pfuncs - list of parity generating functions
515 1.1 oster * qfuncs - list of q generating functions
516 1.1 oster *
517 1.1 oster * A null qfuncs indicates single fault tolerant
518 1.1 oster *****************************************************************************/
519 1.1 oster
520 1.29 perry void
521 1.13 oster rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
522 1.33 christos RF_DagHeader_t *dag_h, void *bp,
523 1.33 christos RF_RaidAccessFlags_t flags,
524 1.13 oster RF_AllocListElem_t *allocList,
525 1.13 oster const RF_RedFuncs_t *pfuncs,
526 1.13 oster const RF_RedFuncs_t *qfuncs)
527 1.1 oster {
528 1.33.104.1 rmind RF_DagNode_t *readDataNodes, *readParityNodes, *termNode;
529 1.22 oster RF_DagNode_t *tmpNode, *tmpreadDataNode, *tmpreadParityNode;
530 1.33.104.1 rmind RF_DagNode_t *xorNodes, *blockNode, *commitNode;
531 1.33.104.1 rmind RF_DagNode_t *writeDataNodes, *writeParityNodes;
532 1.33.104.1 rmind RF_DagNode_t *tmpxorNode, *tmpwriteDataNode;
533 1.22 oster RF_DagNode_t *tmpwriteParityNode;
534 1.22 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
535 1.33.104.1 rmind RF_DagNode_t *tmpwriteQNode, *tmpreadQNode, *tmpqNode, *readQNodes,
536 1.33.104.1 rmind *writeQNodes, *qNodes;
537 1.22 oster #endif
538 1.33.104.1 rmind int i, j, nNodes;
539 1.3 oster RF_ReconUnitNum_t which_ru;
540 1.3 oster int (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
541 1.33.104.1 rmind int (*qfunc) (RF_DagNode_t *) __unused;
542 1.3 oster int numDataNodes, numParityNodes;
543 1.3 oster RF_StripeNum_t parityStripeID;
544 1.3 oster RF_PhysDiskAddr_t *pda;
545 1.33.104.1 rmind const char *name, *qname __unused;
546 1.3 oster long nfaults;
547 1.3 oster
548 1.3 oster nfaults = qfuncs ? 2 : 1;
549 1.3 oster
550 1.3 oster parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
551 1.3 oster asmap->raidAddress, &which_ru);
552 1.3 oster pda = asmap->physInfo;
553 1.3 oster numDataNodes = asmap->numStripeUnitsAccessed;
554 1.3 oster numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
555 1.3 oster
556 1.19 oster #if RF_DEBUG_DAG
557 1.3 oster if (rf_dagDebug) {
558 1.3 oster printf("[Creating small-write DAG]\n");
559 1.3 oster }
560 1.19 oster #endif
561 1.3 oster RF_ASSERT(numDataNodes > 0);
562 1.3 oster dag_h->creator = "SmallWriteDAG";
563 1.3 oster
564 1.3 oster dag_h->numCommitNodes = 1;
565 1.3 oster dag_h->numCommits = 0;
566 1.3 oster dag_h->numSuccedents = 1;
567 1.3 oster
568 1.3 oster /*
569 1.3 oster * DAG creation occurs in four steps:
570 1.3 oster * 1. count the number of nodes in the DAG
571 1.3 oster * 2. create the nodes
572 1.3 oster * 3. initialize the nodes
573 1.3 oster * 4. connect the nodes
574 1.3 oster */
575 1.3 oster
576 1.3 oster /*
577 1.3 oster * Step 1. compute number of nodes in the graph
578 1.3 oster */
579 1.3 oster
580 1.14 oster /* number of nodes: a read and write for each data unit a
581 1.14 oster * redundancy computation node for each parity node (nfaults *
582 1.14 oster * nparity) a read and write for each parity unit a block and
583 1.14 oster * commit node (2) a terminate node if atomic RMW an unlock
584 1.33.104.1 rmind * node for each data unit, redundancy unit
585 1.33.104.1 rmind * totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
586 1.33.104.1 rmind * + (nfaults * 2 * numParityNodes) + 3;
587 1.33.104.1 rmind */
588 1.33.104.1 rmind
589 1.3 oster /*
590 1.3 oster * Step 2. create the nodes
591 1.3 oster */
592 1.22 oster
593 1.22 oster blockNode = rf_AllocDAGNode();
594 1.22 oster blockNode->list_next = dag_h->nodes;
595 1.22 oster dag_h->nodes = blockNode;
596 1.22 oster
597 1.22 oster commitNode = rf_AllocDAGNode();
598 1.22 oster commitNode->list_next = dag_h->nodes;
599 1.22 oster dag_h->nodes = commitNode;
600 1.22 oster
601 1.22 oster for (i = 0; i < numDataNodes; i++) {
602 1.22 oster tmpNode = rf_AllocDAGNode();
603 1.22 oster tmpNode->list_next = dag_h->nodes;
604 1.22 oster dag_h->nodes = tmpNode;
605 1.22 oster }
606 1.22 oster readDataNodes = dag_h->nodes;
607 1.22 oster
608 1.22 oster for (i = 0; i < numParityNodes; i++) {
609 1.22 oster tmpNode = rf_AllocDAGNode();
610 1.22 oster tmpNode->list_next = dag_h->nodes;
611 1.22 oster dag_h->nodes = tmpNode;
612 1.22 oster }
613 1.22 oster readParityNodes = dag_h->nodes;
614 1.29 perry
615 1.22 oster for (i = 0; i < numDataNodes; i++) {
616 1.22 oster tmpNode = rf_AllocDAGNode();
617 1.22 oster tmpNode->list_next = dag_h->nodes;
618 1.22 oster dag_h->nodes = tmpNode;
619 1.22 oster }
620 1.22 oster writeDataNodes = dag_h->nodes;
621 1.22 oster
622 1.22 oster for (i = 0; i < numParityNodes; i++) {
623 1.22 oster tmpNode = rf_AllocDAGNode();
624 1.22 oster tmpNode->list_next = dag_h->nodes;
625 1.22 oster dag_h->nodes = tmpNode;
626 1.22 oster }
627 1.22 oster writeParityNodes = dag_h->nodes;
628 1.22 oster
629 1.22 oster for (i = 0; i < numParityNodes; i++) {
630 1.22 oster tmpNode = rf_AllocDAGNode();
631 1.22 oster tmpNode->list_next = dag_h->nodes;
632 1.22 oster dag_h->nodes = tmpNode;
633 1.22 oster }
634 1.22 oster xorNodes = dag_h->nodes;
635 1.22 oster
636 1.22 oster termNode = rf_AllocDAGNode();
637 1.22 oster termNode->list_next = dag_h->nodes;
638 1.22 oster dag_h->nodes = termNode;
639 1.16 oster
640 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
641 1.3 oster if (nfaults == 2) {
642 1.22 oster for (i = 0; i < numParityNodes; i++) {
643 1.22 oster tmpNode = rf_AllocDAGNode();
644 1.22 oster tmpNode->list_next = dag_h->nodes;
645 1.22 oster dag_h->nodes = tmpNode;
646 1.22 oster }
647 1.22 oster readQNodes = dag_h->nodes;
648 1.22 oster
649 1.22 oster for (i = 0; i < numParityNodes; i++) {
650 1.22 oster tmpNode = rf_AllocDAGNode();
651 1.22 oster tmpNode->list_next = dag_h->nodes;
652 1.22 oster dag_h->nodes = tmpNode;
653 1.22 oster }
654 1.22 oster writeQNodes = dag_h->nodes;
655 1.22 oster
656 1.22 oster for (i = 0; i < numParityNodes; i++) {
657 1.22 oster tmpNode = rf_AllocDAGNode();
658 1.22 oster tmpNode->list_next = dag_h->nodes;
659 1.22 oster dag_h->nodes = tmpNode;
660 1.22 oster }
661 1.22 oster qNodes = dag_h->nodes;
662 1.3 oster } else {
663 1.18 oster readQNodes = writeQNodes = qNodes = NULL;
664 1.3 oster }
665 1.20 oster #endif
666 1.3 oster
667 1.3 oster /*
668 1.3 oster * Step 3. initialize the nodes
669 1.3 oster */
670 1.3 oster /* initialize block node (Nil) */
671 1.3 oster nNodes = numDataNodes + (nfaults * numParityNodes);
672 1.29 perry rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
673 1.29 perry rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0,
674 1.14 oster dag_h, "Nil", allocList);
675 1.3 oster
676 1.3 oster /* initialize commit node (Cmt) */
677 1.29 perry rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
678 1.29 perry rf_NullNodeUndoFunc, NULL, nNodes,
679 1.14 oster (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
680 1.3 oster
681 1.3 oster /* initialize terminate node (Trm) */
682 1.29 perry rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
683 1.29 perry rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0,
684 1.14 oster dag_h, "Trm", allocList);
685 1.3 oster
686 1.3 oster /* initialize nodes which read old data (Rod) */
687 1.22 oster tmpreadDataNode = readDataNodes;
688 1.3 oster for (i = 0; i < numDataNodes; i++) {
689 1.29 perry rf_InitNode(tmpreadDataNode, rf_wait, RF_FALSE,
690 1.14 oster rf_DiskReadFunc, rf_DiskReadUndoFunc,
691 1.29 perry rf_GenericWakeupFunc, (nfaults * numParityNodes),
692 1.14 oster 1, 4, 0, dag_h, "Rod", allocList);
693 1.3 oster RF_ASSERT(pda != NULL);
694 1.3 oster /* physical disk addr desc */
695 1.22 oster tmpreadDataNode->params[0].p = pda;
696 1.3 oster /* buffer to hold old data */
697 1.27 oster tmpreadDataNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
698 1.22 oster tmpreadDataNode->params[2].v = parityStripeID;
699 1.22 oster tmpreadDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
700 1.17 oster which_ru);
701 1.3 oster pda = pda->next;
702 1.22 oster for (j = 0; j < tmpreadDataNode->numSuccedents; j++) {
703 1.22 oster tmpreadDataNode->propList[j] = NULL;
704 1.3 oster }
705 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
706 1.3 oster }
707 1.3 oster
708 1.3 oster /* initialize nodes which read old parity (Rop) */
709 1.3 oster pda = asmap->parityInfo;
710 1.3 oster i = 0;
711 1.22 oster tmpreadParityNode = readParityNodes;
712 1.3 oster for (i = 0; i < numParityNodes; i++) {
713 1.3 oster RF_ASSERT(pda != NULL);
714 1.29 perry rf_InitNode(tmpreadParityNode, rf_wait, RF_FALSE,
715 1.14 oster rf_DiskReadFunc, rf_DiskReadUndoFunc,
716 1.29 perry rf_GenericWakeupFunc, numParityNodes, 1, 4, 0,
717 1.14 oster dag_h, "Rop", allocList);
718 1.22 oster tmpreadParityNode->params[0].p = pda;
719 1.3 oster /* buffer to hold old parity */
720 1.27 oster tmpreadParityNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
721 1.22 oster tmpreadParityNode->params[2].v = parityStripeID;
722 1.22 oster tmpreadParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
723 1.17 oster which_ru);
724 1.3 oster pda = pda->next;
725 1.22 oster for (j = 0; j < tmpreadParityNode->numSuccedents; j++) {
726 1.22 oster tmpreadParityNode->propList[0] = NULL;
727 1.3 oster }
728 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
729 1.3 oster }
730 1.3 oster
731 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
732 1.3 oster /* initialize nodes which read old Q (Roq) */
733 1.3 oster if (nfaults == 2) {
734 1.3 oster pda = asmap->qInfo;
735 1.22 oster tmpreadQNode = readQNodes;
736 1.3 oster for (i = 0; i < numParityNodes; i++) {
737 1.3 oster RF_ASSERT(pda != NULL);
738 1.29 perry rf_InitNode(tmpreadQNode, rf_wait, RF_FALSE,
739 1.14 oster rf_DiskReadFunc, rf_DiskReadUndoFunc,
740 1.29 perry rf_GenericWakeupFunc, numParityNodes,
741 1.14 oster 1, 4, 0, dag_h, "Roq", allocList);
742 1.22 oster tmpreadQNode->params[0].p = pda;
743 1.3 oster /* buffer to hold old Q */
744 1.24 oster tmpreadQNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h,
745 1.24 oster pda->numSector << raidPtr->logBytesPerSector);
746 1.22 oster tmpreadQNode->params[2].v = parityStripeID;
747 1.22 oster tmpreadQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
748 1.17 oster which_ru);
749 1.3 oster pda = pda->next;
750 1.22 oster for (j = 0; j < tmpreadQNode->numSuccedents; j++) {
751 1.22 oster tmpreadQNode->propList[0] = NULL;
752 1.3 oster }
753 1.22 oster tmpreadQNode = tmpreadQNode->list_next;
754 1.3 oster }
755 1.3 oster }
756 1.20 oster #endif
757 1.3 oster /* initialize nodes which write new data (Wnd) */
758 1.3 oster pda = asmap->physInfo;
759 1.22 oster tmpwriteDataNode = writeDataNodes;
760 1.3 oster for (i = 0; i < numDataNodes; i++) {
761 1.3 oster RF_ASSERT(pda != NULL);
762 1.29 perry rf_InitNode(tmpwriteDataNode, rf_wait, RF_FALSE,
763 1.29 perry rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
764 1.14 oster rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
765 1.14 oster "Wnd", allocList);
766 1.3 oster /* physical disk addr desc */
767 1.22 oster tmpwriteDataNode->params[0].p = pda;
768 1.3 oster /* buffer holding new data to be written */
769 1.22 oster tmpwriteDataNode->params[1].p = pda->bufPtr;
770 1.22 oster tmpwriteDataNode->params[2].v = parityStripeID;
771 1.22 oster tmpwriteDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
772 1.17 oster which_ru);
773 1.3 oster pda = pda->next;
774 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
775 1.3 oster }
776 1.3 oster
777 1.3 oster /*
778 1.3 oster * Initialize nodes which compute new parity and Q.
779 1.3 oster */
780 1.3 oster /*
781 1.3 oster * We use the simple XOR func in the double-XOR case, and when
782 1.14 oster * we're accessing only a portion of one stripe unit. The
783 1.14 oster * distinction between the two is that the regular XOR func
784 1.14 oster * assumes that the targbuf is a full SU in size, and examines
785 1.14 oster * the pda associated with the buffer to decide where within
786 1.14 oster * the buffer to XOR the data, whereas the simple XOR func
787 1.14 oster * just XORs the data into the start of the buffer. */
788 1.3 oster if ((numParityNodes == 2) || ((numDataNodes == 1)
789 1.29 perry && (asmap->totalSectorsAccessed <
790 1.14 oster raidPtr->Layout.sectorsPerStripeUnit))) {
791 1.3 oster func = pfuncs->simple;
792 1.3 oster undoFunc = rf_NullNodeUndoFunc;
793 1.3 oster name = pfuncs->SimpleName;
794 1.3 oster if (qfuncs) {
795 1.3 oster qfunc = qfuncs->simple;
796 1.3 oster qname = qfuncs->SimpleName;
797 1.3 oster } else {
798 1.3 oster qfunc = NULL;
799 1.3 oster qname = NULL;
800 1.3 oster }
801 1.3 oster } else {
802 1.3 oster func = pfuncs->regular;
803 1.3 oster undoFunc = rf_NullNodeUndoFunc;
804 1.3 oster name = pfuncs->RegularName;
805 1.3 oster if (qfuncs) {
806 1.3 oster qfunc = qfuncs->regular;
807 1.3 oster qname = qfuncs->RegularName;
808 1.3 oster } else {
809 1.3 oster qfunc = NULL;
810 1.3 oster qname = NULL;
811 1.3 oster }
812 1.3 oster }
813 1.3 oster /*
814 1.3 oster * Initialize the xor nodes: params are {pda,buf}
815 1.3 oster * from {Rod,Wnd,Rop} nodes, and raidPtr
816 1.3 oster */
817 1.3 oster if (numParityNodes == 2) {
818 1.3 oster /* double-xor case */
819 1.22 oster tmpxorNode = xorNodes;
820 1.22 oster tmpreadDataNode = readDataNodes;
821 1.22 oster tmpreadParityNode = readParityNodes;
822 1.22 oster tmpwriteDataNode = writeDataNodes;
823 1.33.104.1 rmind #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
824 1.22 oster tmpqNode = qNodes;
825 1.22 oster tmpreadQNode = readQNodes;
826 1.33.104.1 rmind #endif
827 1.3 oster for (i = 0; i < numParityNodes; i++) {
828 1.3 oster /* note: no wakeup func for xor */
829 1.29 perry rf_InitNode(tmpxorNode, rf_wait, RF_FALSE, func,
830 1.29 perry undoFunc, NULL, 1,
831 1.29 perry (numDataNodes + numParityNodes),
832 1.14 oster 7, 1, dag_h, name, allocList);
833 1.22 oster tmpxorNode->flags |= RF_DAGNODE_FLAG_YIELD;
834 1.22 oster tmpxorNode->params[0] = tmpreadDataNode->params[0];
835 1.22 oster tmpxorNode->params[1] = tmpreadDataNode->params[1];
836 1.22 oster tmpxorNode->params[2] = tmpreadParityNode->params[0];
837 1.22 oster tmpxorNode->params[3] = tmpreadParityNode->params[1];
838 1.22 oster tmpxorNode->params[4] = tmpwriteDataNode->params[0];
839 1.22 oster tmpxorNode->params[5] = tmpwriteDataNode->params[1];
840 1.22 oster tmpxorNode->params[6].p = raidPtr;
841 1.3 oster /* use old parity buf as target buf */
842 1.22 oster tmpxorNode->results[0] = tmpreadParityNode->params[1].p;
843 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
844 1.3 oster if (nfaults == 2) {
845 1.3 oster /* note: no wakeup func for qor */
846 1.29 perry rf_InitNode(tmpqNode, rf_wait, RF_FALSE,
847 1.14 oster qfunc, undoFunc, NULL, 1,
848 1.29 perry (numDataNodes + numParityNodes),
849 1.14 oster 7, 1, dag_h, qname, allocList);
850 1.22 oster tmpqNode->params[0] = tmpreadDataNode->params[0];
851 1.22 oster tmpqNode->params[1] = tmpreadDataNode->params[1];
852 1.22 oster tmpqNode->params[2] = tmpreadQNode->.params[0];
853 1.22 oster tmpqNode->params[3] = tmpreadQNode->params[1];
854 1.22 oster tmpqNode->params[4] = tmpwriteDataNode->params[0];
855 1.22 oster tmpqNode->params[5] = tmpwriteDataNode->params[1];
856 1.22 oster tmpqNode->params[6].p = raidPtr;
857 1.3 oster /* use old Q buf as target buf */
858 1.22 oster tmpqNode->results[0] = tmpreadQNode->params[1].p;
859 1.22 oster tmpqNode = tmpqNode->list_next;
860 1.22 oster tmpreadQNodes = tmpreadQNodes->list_next;
861 1.3 oster }
862 1.20 oster #endif
863 1.22 oster tmpxorNode = tmpxorNode->list_next;
864 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
865 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
866 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
867 1.3 oster }
868 1.3 oster } else {
869 1.3 oster /* there is only one xor node in this case */
870 1.29 perry rf_InitNode(xorNodes, rf_wait, RF_FALSE, func,
871 1.14 oster undoFunc, NULL, 1, (numDataNodes + numParityNodes),
872 1.29 perry (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
873 1.14 oster dag_h, name, allocList);
874 1.22 oster xorNodes->flags |= RF_DAGNODE_FLAG_YIELD;
875 1.22 oster tmpreadDataNode = readDataNodes;
876 1.29 perry for (i = 0; i < numDataNodes; i++) { /* used to be"numDataNodes + 1" until we factored
877 1.22 oster out the "+1" into the "deal with Rop separately below */
878 1.22 oster /* set up params related to Rod nodes */
879 1.22 oster xorNodes->params[2 * i + 0] = tmpreadDataNode->params[0]; /* pda */
880 1.22 oster xorNodes->params[2 * i + 1] = tmpreadDataNode->params[1]; /* buffer ptr */
881 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
882 1.22 oster }
883 1.22 oster /* deal with Rop separately */
884 1.22 oster xorNodes->params[2 * numDataNodes + 0] = readParityNodes->params[0]; /* pda */
885 1.22 oster xorNodes->params[2 * numDataNodes + 1] = readParityNodes->params[1]; /* buffer ptr */
886 1.22 oster
887 1.22 oster tmpwriteDataNode = writeDataNodes;
888 1.3 oster for (i = 0; i < numDataNodes; i++) {
889 1.3 oster /* set up params related to Wnd and Wnp nodes */
890 1.22 oster xorNodes->params[2 * (numDataNodes + 1 + i) + 0] = /* pda */
891 1.22 oster tmpwriteDataNode->params[0];
892 1.22 oster xorNodes->params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */
893 1.22 oster tmpwriteDataNode->params[1];
894 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
895 1.3 oster }
896 1.3 oster /* xor node needs to get at RAID information */
897 1.22 oster xorNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
898 1.22 oster xorNodes->results[0] = readParityNodes->params[1].p;
899 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
900 1.3 oster if (nfaults == 2) {
901 1.29 perry rf_InitNode(qNodes, rf_wait, RF_FALSE, qfunc,
902 1.14 oster undoFunc, NULL, 1,
903 1.14 oster (numDataNodes + numParityNodes),
904 1.14 oster (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
905 1.14 oster dag_h, qname, allocList);
906 1.22 oster tmpreadDataNode = readDataNodes;
907 1.3 oster for (i = 0; i < numDataNodes; i++) {
908 1.3 oster /* set up params related to Rod */
909 1.22 oster qNodes->params[2 * i + 0] = tmpreadDataNode->params[0]; /* pda */
910 1.22 oster qNodes->params[2 * i + 1] = tmpreadDataNode->params[1]; /* buffer ptr */
911 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
912 1.3 oster }
913 1.3 oster /* and read old q */
914 1.22 oster qNodes->params[2 * numDataNodes + 0] = /* pda */
915 1.22 oster readQNodes->params[0];
916 1.22 oster qNodes->params[2 * numDataNodes + 1] = /* buffer ptr */
917 1.22 oster readQNodes->params[1];
918 1.22 oster tmpwriteDataNode = writeDataNodes;
919 1.3 oster for (i = 0; i < numDataNodes; i++) {
920 1.3 oster /* set up params related to Wnd nodes */
921 1.22 oster qNodes->params[2 * (numDataNodes + 1 + i) + 0] = /* pda */
922 1.22 oster tmpwriteDataNode->params[0];
923 1.22 oster qNodes->params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */
924 1.22 oster tmpwriteDataNode->params[1];
925 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
926 1.3 oster }
927 1.3 oster /* xor node needs to get at RAID information */
928 1.22 oster qNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
929 1.22 oster qNodes->results[0] = readQNodes->params[1].p;
930 1.3 oster }
931 1.20 oster #endif
932 1.3 oster }
933 1.3 oster
934 1.3 oster /* initialize nodes which write new parity (Wnp) */
935 1.3 oster pda = asmap->parityInfo;
936 1.22 oster tmpwriteParityNode = writeParityNodes;
937 1.22 oster tmpxorNode = xorNodes;
938 1.3 oster for (i = 0; i < numParityNodes; i++) {
939 1.29 perry rf_InitNode(tmpwriteParityNode, rf_wait, RF_FALSE,
940 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
941 1.14 oster rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
942 1.14 oster "Wnp", allocList);
943 1.3 oster RF_ASSERT(pda != NULL);
944 1.22 oster tmpwriteParityNode->params[0].p = pda; /* param 1 (bufPtr)
945 1.22 oster * filled in by xor node */
946 1.22 oster tmpwriteParityNode->params[1].p = tmpxorNode->results[0]; /* buffer pointer for
947 1.22 oster * parity write
948 1.22 oster * operation */
949 1.22 oster tmpwriteParityNode->params[2].v = parityStripeID;
950 1.22 oster tmpwriteParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
951 1.17 oster which_ru);
952 1.3 oster pda = pda->next;
953 1.22 oster tmpwriteParityNode = tmpwriteParityNode->list_next;
954 1.22 oster tmpxorNode = tmpxorNode->list_next;
955 1.3 oster }
956 1.3 oster
957 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
958 1.3 oster /* initialize nodes which write new Q (Wnq) */
959 1.3 oster if (nfaults == 2) {
960 1.3 oster pda = asmap->qInfo;
961 1.22 oster tmpwriteQNode = writeQNodes;
962 1.22 oster tmpqNode = qNodes;
963 1.3 oster for (i = 0; i < numParityNodes; i++) {
964 1.29 perry rf_InitNode(tmpwriteQNode, rf_wait, RF_FALSE,
965 1.29 perry rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
966 1.14 oster rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
967 1.14 oster "Wnq", allocList);
968 1.3 oster RF_ASSERT(pda != NULL);
969 1.22 oster tmpwriteQNode->params[0].p = pda; /* param 1 (bufPtr)
970 1.3 oster * filled in by xor node */
971 1.22 oster tmpwriteQNode->params[1].p = tmpqNode->results[0]; /* buffer pointer for
972 1.3 oster * parity write
973 1.3 oster * operation */
974 1.22 oster tmpwriteQNode->params[2].v = parityStripeID;
975 1.22 oster tmpwriteQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
976 1.17 oster which_ru);
977 1.3 oster pda = pda->next;
978 1.22 oster tmpwriteQNode = tmpwriteQNode->list_next;
979 1.22 oster tmpqNode = tmpqNode->list_next;
980 1.3 oster }
981 1.3 oster }
982 1.20 oster #endif
983 1.3 oster /*
984 1.3 oster * Step 4. connect the nodes.
985 1.3 oster */
986 1.3 oster
987 1.3 oster /* connect header to block node */
988 1.3 oster dag_h->succedents[0] = blockNode;
989 1.3 oster
990 1.3 oster /* connect block node to read old data nodes */
991 1.3 oster RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
992 1.22 oster tmpreadDataNode = readDataNodes;
993 1.3 oster for (i = 0; i < numDataNodes; i++) {
994 1.22 oster blockNode->succedents[i] = tmpreadDataNode;
995 1.22 oster RF_ASSERT(tmpreadDataNode->numAntecedents == 1);
996 1.22 oster tmpreadDataNode->antecedents[0] = blockNode;
997 1.22 oster tmpreadDataNode->antType[0] = rf_control;
998 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
999 1.3 oster }
1000 1.3 oster
1001 1.3 oster /* connect block node to read old parity nodes */
1002 1.22 oster tmpreadParityNode = readParityNodes;
1003 1.3 oster for (i = 0; i < numParityNodes; i++) {
1004 1.22 oster blockNode->succedents[numDataNodes + i] = tmpreadParityNode;
1005 1.22 oster RF_ASSERT(tmpreadParityNode->numAntecedents == 1);
1006 1.22 oster tmpreadParityNode->antecedents[0] = blockNode;
1007 1.22 oster tmpreadParityNode->antType[0] = rf_control;
1008 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
1009 1.3 oster }
1010 1.3 oster
1011 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1012 1.3 oster /* connect block node to read old Q nodes */
1013 1.3 oster if (nfaults == 2) {
1014 1.22 oster tmpreadQNode = readQNodes;
1015 1.3 oster for (i = 0; i < numParityNodes; i++) {
1016 1.22 oster blockNode->succedents[numDataNodes + numParityNodes + i] = tmpreadQNode;
1017 1.22 oster RF_ASSERT(tmpreadQNode->numAntecedents == 1);
1018 1.22 oster tmpreadQNode->antecedents[0] = blockNode;
1019 1.22 oster tmpreadQNode->antType[0] = rf_control;
1020 1.22 oster tmpreadQNode = tmpreadQNode->list_next;
1021 1.3 oster }
1022 1.3 oster }
1023 1.20 oster #endif
1024 1.3 oster /* connect read old data nodes to xor nodes */
1025 1.22 oster tmpreadDataNode = readDataNodes;
1026 1.3 oster for (i = 0; i < numDataNodes; i++) {
1027 1.22 oster RF_ASSERT(tmpreadDataNode->numSuccedents == (nfaults * numParityNodes));
1028 1.22 oster tmpxorNode = xorNodes;
1029 1.3 oster for (j = 0; j < numParityNodes; j++) {
1030 1.22 oster RF_ASSERT(tmpxorNode->numAntecedents == numDataNodes + numParityNodes);
1031 1.22 oster tmpreadDataNode->succedents[j] = tmpxorNode;
1032 1.22 oster tmpxorNode->antecedents[i] = tmpreadDataNode;
1033 1.22 oster tmpxorNode->antType[i] = rf_trueData;
1034 1.22 oster tmpxorNode = tmpxorNode->list_next;
1035 1.3 oster }
1036 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
1037 1.3 oster }
1038 1.3 oster
1039 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1040 1.3 oster /* connect read old data nodes to q nodes */
1041 1.3 oster if (nfaults == 2) {
1042 1.22 oster tmpreadDataNode = readDataNodes;
1043 1.3 oster for (i = 0; i < numDataNodes; i++) {
1044 1.22 oster tmpqNode = qNodes;
1045 1.3 oster for (j = 0; j < numParityNodes; j++) {
1046 1.22 oster RF_ASSERT(tmpqNode->numAntecedents == numDataNodes + numParityNodes);
1047 1.22 oster tmpreadDataNode->succedents[numParityNodes + j] = tmpqNode;
1048 1.22 oster tmpqNode->antecedents[i] = tmpreadDataNode;
1049 1.22 oster tmpqNode->antType[i] = rf_trueData;
1050 1.22 oster tmpqNode = tmpqNode->list_next;
1051 1.3 oster }
1052 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
1053 1.3 oster }
1054 1.3 oster }
1055 1.20 oster #endif
1056 1.3 oster /* connect read old parity nodes to xor nodes */
1057 1.22 oster tmpreadParityNode = readParityNodes;
1058 1.3 oster for (i = 0; i < numParityNodes; i++) {
1059 1.22 oster RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1060 1.22 oster tmpxorNode = xorNodes;
1061 1.3 oster for (j = 0; j < numParityNodes; j++) {
1062 1.22 oster tmpreadParityNode->succedents[j] = tmpxorNode;
1063 1.22 oster tmpxorNode->antecedents[numDataNodes + i] = tmpreadParityNode;
1064 1.22 oster tmpxorNode->antType[numDataNodes + i] = rf_trueData;
1065 1.22 oster tmpxorNode = tmpxorNode->list_next;
1066 1.3 oster }
1067 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
1068 1.3 oster }
1069 1.3 oster
1070 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1071 1.3 oster /* connect read old q nodes to q nodes */
1072 1.3 oster if (nfaults == 2) {
1073 1.22 oster tmpreadParityNode = readParityNodes;
1074 1.22 oster tmpreadQNode = readQNodes;
1075 1.3 oster for (i = 0; i < numParityNodes; i++) {
1076 1.22 oster RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1077 1.22 oster tmpqNode = qNodes;
1078 1.3 oster for (j = 0; j < numParityNodes; j++) {
1079 1.22 oster tmpreadQNode->succedents[j] = tmpqNode;
1080 1.22 oster tmpqNode->antecedents[numDataNodes + i] = tmpreadQNodes;
1081 1.22 oster tmpqNode->antType[numDataNodes + i] = rf_trueData;
1082 1.22 oster tmpqNode = tmpqNode->list_next;
1083 1.3 oster }
1084 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
1085 1.22 oster tmpreadQNode = tmpreadQNode->list_next;
1086 1.3 oster }
1087 1.3 oster }
1088 1.20 oster #endif
1089 1.3 oster /* connect xor nodes to commit node */
1090 1.3 oster RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
1091 1.22 oster tmpxorNode = xorNodes;
1092 1.3 oster for (i = 0; i < numParityNodes; i++) {
1093 1.22 oster RF_ASSERT(tmpxorNode->numSuccedents == 1);
1094 1.22 oster tmpxorNode->succedents[0] = commitNode;
1095 1.22 oster commitNode->antecedents[i] = tmpxorNode;
1096 1.3 oster commitNode->antType[i] = rf_control;
1097 1.22 oster tmpxorNode = tmpxorNode->list_next;
1098 1.3 oster }
1099 1.3 oster
1100 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1101 1.3 oster /* connect q nodes to commit node */
1102 1.3 oster if (nfaults == 2) {
1103 1.22 oster tmpqNode = qNodes;
1104 1.3 oster for (i = 0; i < numParityNodes; i++) {
1105 1.22 oster RF_ASSERT(tmpqNode->numSuccedents == 1);
1106 1.22 oster tmpqNode->succedents[0] = commitNode;
1107 1.22 oster commitNode->antecedents[i + numParityNodes] = tmpqNode;
1108 1.3 oster commitNode->antType[i + numParityNodes] = rf_control;
1109 1.22 oster tmpqNode = tmpqNode->list_next;
1110 1.3 oster }
1111 1.3 oster }
1112 1.20 oster #endif
1113 1.3 oster /* connect commit node to write nodes */
1114 1.3 oster RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
1115 1.22 oster tmpwriteDataNode = writeDataNodes;
1116 1.3 oster for (i = 0; i < numDataNodes; i++) {
1117 1.28 oster RF_ASSERT(tmpwriteDataNode->numAntecedents == 1);
1118 1.22 oster commitNode->succedents[i] = tmpwriteDataNode;
1119 1.22 oster tmpwriteDataNode->antecedents[0] = commitNode;
1120 1.22 oster tmpwriteDataNode->antType[0] = rf_trueData;
1121 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
1122 1.3 oster }
1123 1.22 oster tmpwriteParityNode = writeParityNodes;
1124 1.3 oster for (i = 0; i < numParityNodes; i++) {
1125 1.22 oster RF_ASSERT(tmpwriteParityNode->numAntecedents == 1);
1126 1.22 oster commitNode->succedents[i + numDataNodes] = tmpwriteParityNode;
1127 1.22 oster tmpwriteParityNode->antecedents[0] = commitNode;
1128 1.22 oster tmpwriteParityNode->antType[0] = rf_trueData;
1129 1.22 oster tmpwriteParityNode = tmpwriteParityNode->list_next;
1130 1.3 oster }
1131 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1132 1.3 oster if (nfaults == 2) {
1133 1.22 oster tmpwriteQNode = writeQNodes;
1134 1.3 oster for (i = 0; i < numParityNodes; i++) {
1135 1.22 oster RF_ASSERT(tmpwriteQNode->numAntecedents == 1);
1136 1.22 oster commitNode->succedents[i + numDataNodes + numParityNodes] = tmpwriteQNode;
1137 1.22 oster tmpwriteQNode->antecedents[0] = commitNode;
1138 1.22 oster tmpwriteQNode->antType[0] = rf_trueData;
1139 1.22 oster tmpwriteQNode = tmpwriteQNode->list_next;
1140 1.3 oster }
1141 1.3 oster }
1142 1.20 oster #endif
1143 1.3 oster RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1144 1.3 oster RF_ASSERT(termNode->numSuccedents == 0);
1145 1.22 oster tmpwriteDataNode = writeDataNodes;
1146 1.3 oster for (i = 0; i < numDataNodes; i++) {
1147 1.16 oster /* connect write new data nodes to term node */
1148 1.22 oster RF_ASSERT(tmpwriteDataNode->numSuccedents == 1);
1149 1.16 oster RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1150 1.22 oster tmpwriteDataNode->succedents[0] = termNode;
1151 1.22 oster termNode->antecedents[i] = tmpwriteDataNode;
1152 1.16 oster termNode->antType[i] = rf_control;
1153 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
1154 1.3 oster }
1155 1.3 oster
1156 1.22 oster tmpwriteParityNode = writeParityNodes;
1157 1.3 oster for (i = 0; i < numParityNodes; i++) {
1158 1.22 oster RF_ASSERT(tmpwriteParityNode->numSuccedents == 1);
1159 1.22 oster tmpwriteParityNode->succedents[0] = termNode;
1160 1.22 oster termNode->antecedents[numDataNodes + i] = tmpwriteParityNode;
1161 1.16 oster termNode->antType[numDataNodes + i] = rf_control;
1162 1.22 oster tmpwriteParityNode = tmpwriteParityNode->list_next;
1163 1.3 oster }
1164 1.3 oster
1165 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1166 1.3 oster if (nfaults == 2) {
1167 1.22 oster tmpwriteQNode = writeQNodes;
1168 1.3 oster for (i = 0; i < numParityNodes; i++) {
1169 1.22 oster RF_ASSERT(tmpwriteQNode->numSuccedents == 1);
1170 1.22 oster tmpwriteQNode->succedents[0] = termNode;
1171 1.22 oster termNode->antecedents[numDataNodes + numParityNodes + i] = tmpwriteQNode;
1172 1.16 oster termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
1173 1.22 oster tmpwriteQNode = tmpwriteQNode->list_next;
1174 1.3 oster }
1175 1.3 oster }
1176 1.20 oster #endif
1177 1.1 oster }
1178 1.1 oster
1179 1.1 oster
1180 1.1 oster /******************************************************************************
1181 1.1 oster * create a write graph (fault-free or degraded) for RAID level 1
1182 1.1 oster *
1183 1.1 oster * Hdr -> Commit -> Wpd -> Nil -> Trm
1184 1.1 oster * -> Wsd ->
1185 1.1 oster *
1186 1.1 oster * The "Wpd" node writes data to the primary copy in the mirror pair
1187 1.1 oster * The "Wsd" node writes data to the secondary copy in the mirror pair
1188 1.1 oster *
1189 1.1 oster * Parameters: raidPtr - description of the physical array
1190 1.1 oster * asmap - logical & physical addresses for this access
1191 1.1 oster * bp - buffer ptr (holds write data)
1192 1.3 oster * flags - general flags (e.g. disk locking)
1193 1.1 oster * allocList - list of memory allocated in DAG creation
1194 1.1 oster *****************************************************************************/
1195 1.1 oster
1196 1.29 perry void
1197 1.13 oster rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
1198 1.33 christos RF_DagHeader_t *dag_h, void *bp,
1199 1.33 christos RF_RaidAccessFlags_t flags,
1200 1.13 oster RF_AllocListElem_t *allocList)
1201 1.1 oster {
1202 1.3 oster RF_DagNode_t *unblockNode, *termNode, *commitNode;
1203 1.22 oster RF_DagNode_t *wndNode, *wmirNode;
1204 1.22 oster RF_DagNode_t *tmpNode, *tmpwndNode, *tmpwmirNode;
1205 1.3 oster int nWndNodes, nWmirNodes, i;
1206 1.3 oster RF_ReconUnitNum_t which_ru;
1207 1.3 oster RF_PhysDiskAddr_t *pda, *pdaP;
1208 1.3 oster RF_StripeNum_t parityStripeID;
1209 1.3 oster
1210 1.3 oster parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
1211 1.3 oster asmap->raidAddress, &which_ru);
1212 1.19 oster #if RF_DEBUG_DAG
1213 1.3 oster if (rf_dagDebug) {
1214 1.3 oster printf("[Creating RAID level 1 write DAG]\n");
1215 1.3 oster }
1216 1.19 oster #endif
1217 1.3 oster dag_h->creator = "RaidOneWriteDAG";
1218 1.3 oster
1219 1.3 oster /* 2 implies access not SU aligned */
1220 1.3 oster nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
1221 1.3 oster nWndNodes = (asmap->physInfo->next) ? 2 : 1;
1222 1.3 oster
1223 1.3 oster /* alloc the Wnd nodes and the Wmir node */
1224 1.3 oster if (asmap->numDataFailed == 1)
1225 1.3 oster nWndNodes--;
1226 1.3 oster if (asmap->numParityFailed == 1)
1227 1.3 oster nWmirNodes--;
1228 1.3 oster
1229 1.3 oster /* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
1230 1.3 oster * + terminator) */
1231 1.22 oster for (i = 0; i < nWndNodes; i++) {
1232 1.22 oster tmpNode = rf_AllocDAGNode();
1233 1.22 oster tmpNode->list_next = dag_h->nodes;
1234 1.22 oster dag_h->nodes = tmpNode;
1235 1.22 oster }
1236 1.22 oster wndNode = dag_h->nodes;
1237 1.22 oster
1238 1.22 oster for (i = 0; i < nWmirNodes; i++) {
1239 1.22 oster tmpNode = rf_AllocDAGNode();
1240 1.22 oster tmpNode->list_next = dag_h->nodes;
1241 1.22 oster dag_h->nodes = tmpNode;
1242 1.22 oster }
1243 1.22 oster wmirNode = dag_h->nodes;
1244 1.22 oster
1245 1.22 oster commitNode = rf_AllocDAGNode();
1246 1.22 oster commitNode->list_next = dag_h->nodes;
1247 1.22 oster dag_h->nodes = commitNode;
1248 1.22 oster
1249 1.22 oster unblockNode = rf_AllocDAGNode();
1250 1.22 oster unblockNode->list_next = dag_h->nodes;
1251 1.22 oster dag_h->nodes = unblockNode;
1252 1.22 oster
1253 1.22 oster termNode = rf_AllocDAGNode();
1254 1.22 oster termNode->list_next = dag_h->nodes;
1255 1.22 oster dag_h->nodes = termNode;
1256 1.3 oster
1257 1.3 oster /* this dag can commit immediately */
1258 1.3 oster dag_h->numCommitNodes = 1;
1259 1.3 oster dag_h->numCommits = 0;
1260 1.3 oster dag_h->numSuccedents = 1;
1261 1.3 oster
1262 1.3 oster /* initialize the commit, unblock, and term nodes */
1263 1.29 perry rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
1264 1.29 perry rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes),
1265 1.14 oster 0, 0, 0, dag_h, "Cmt", allocList);
1266 1.29 perry rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
1267 1.29 perry rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes),
1268 1.14 oster 0, 0, dag_h, "Nil", allocList);
1269 1.29 perry rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
1270 1.29 perry rf_TerminateUndoFunc, NULL, 0, 1, 0, 0,
1271 1.14 oster dag_h, "Trm", allocList);
1272 1.3 oster
1273 1.3 oster /* initialize the wnd nodes */
1274 1.3 oster if (nWndNodes > 0) {
1275 1.3 oster pda = asmap->physInfo;
1276 1.22 oster tmpwndNode = wndNode;
1277 1.3 oster for (i = 0; i < nWndNodes; i++) {
1278 1.29 perry rf_InitNode(tmpwndNode, rf_wait, RF_FALSE,
1279 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1280 1.29 perry rf_GenericWakeupFunc, 1, 1, 4, 0,
1281 1.14 oster dag_h, "Wpd", allocList);
1282 1.3 oster RF_ASSERT(pda != NULL);
1283 1.22 oster tmpwndNode->params[0].p = pda;
1284 1.22 oster tmpwndNode->params[1].p = pda->bufPtr;
1285 1.22 oster tmpwndNode->params[2].v = parityStripeID;
1286 1.22 oster tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1287 1.3 oster pda = pda->next;
1288 1.22 oster tmpwndNode = tmpwndNode->list_next;
1289 1.3 oster }
1290 1.3 oster RF_ASSERT(pda == NULL);
1291 1.3 oster }
1292 1.3 oster /* initialize the mirror nodes */
1293 1.3 oster if (nWmirNodes > 0) {
1294 1.3 oster pda = asmap->physInfo;
1295 1.3 oster pdaP = asmap->parityInfo;
1296 1.22 oster tmpwmirNode = wmirNode;
1297 1.3 oster for (i = 0; i < nWmirNodes; i++) {
1298 1.29 perry rf_InitNode(tmpwmirNode, rf_wait, RF_FALSE,
1299 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1300 1.29 perry rf_GenericWakeupFunc, 1, 1, 4, 0,
1301 1.14 oster dag_h, "Wsd", allocList);
1302 1.3 oster RF_ASSERT(pda != NULL);
1303 1.22 oster tmpwmirNode->params[0].p = pdaP;
1304 1.22 oster tmpwmirNode->params[1].p = pda->bufPtr;
1305 1.22 oster tmpwmirNode->params[2].v = parityStripeID;
1306 1.22 oster tmpwmirNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1307 1.3 oster pda = pda->next;
1308 1.3 oster pdaP = pdaP->next;
1309 1.22 oster tmpwmirNode = tmpwmirNode->list_next;
1310 1.3 oster }
1311 1.3 oster RF_ASSERT(pda == NULL);
1312 1.3 oster RF_ASSERT(pdaP == NULL);
1313 1.3 oster }
1314 1.3 oster /* link the header node to the commit node */
1315 1.3 oster RF_ASSERT(dag_h->numSuccedents == 1);
1316 1.3 oster RF_ASSERT(commitNode->numAntecedents == 0);
1317 1.3 oster dag_h->succedents[0] = commitNode;
1318 1.3 oster
1319 1.3 oster /* link the commit node to the write nodes */
1320 1.3 oster RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
1321 1.22 oster tmpwndNode = wndNode;
1322 1.3 oster for (i = 0; i < nWndNodes; i++) {
1323 1.22 oster RF_ASSERT(tmpwndNode->numAntecedents == 1);
1324 1.22 oster commitNode->succedents[i] = tmpwndNode;
1325 1.22 oster tmpwndNode->antecedents[0] = commitNode;
1326 1.22 oster tmpwndNode->antType[0] = rf_control;
1327 1.22 oster tmpwndNode = tmpwndNode->list_next;
1328 1.3 oster }
1329 1.22 oster tmpwmirNode = wmirNode;
1330 1.3 oster for (i = 0; i < nWmirNodes; i++) {
1331 1.22 oster RF_ASSERT(tmpwmirNode->numAntecedents == 1);
1332 1.22 oster commitNode->succedents[i + nWndNodes] = tmpwmirNode;
1333 1.22 oster tmpwmirNode->antecedents[0] = commitNode;
1334 1.22 oster tmpwmirNode->antType[0] = rf_control;
1335 1.22 oster tmpwmirNode = tmpwmirNode->list_next;
1336 1.3 oster }
1337 1.3 oster
1338 1.3 oster /* link the write nodes to the unblock node */
1339 1.3 oster RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
1340 1.22 oster tmpwndNode = wndNode;
1341 1.3 oster for (i = 0; i < nWndNodes; i++) {
1342 1.22 oster RF_ASSERT(tmpwndNode->numSuccedents == 1);
1343 1.22 oster tmpwndNode->succedents[0] = unblockNode;
1344 1.22 oster unblockNode->antecedents[i] = tmpwndNode;
1345 1.3 oster unblockNode->antType[i] = rf_control;
1346 1.22 oster tmpwndNode = tmpwndNode->list_next;
1347 1.3 oster }
1348 1.22 oster tmpwmirNode = wmirNode;
1349 1.3 oster for (i = 0; i < nWmirNodes; i++) {
1350 1.22 oster RF_ASSERT(tmpwmirNode->numSuccedents == 1);
1351 1.22 oster tmpwmirNode->succedents[0] = unblockNode;
1352 1.22 oster unblockNode->antecedents[i + nWndNodes] = tmpwmirNode;
1353 1.3 oster unblockNode->antType[i + nWndNodes] = rf_control;
1354 1.22 oster tmpwmirNode = tmpwmirNode->list_next;
1355 1.3 oster }
1356 1.3 oster
1357 1.3 oster /* link the unblock node to the term node */
1358 1.3 oster RF_ASSERT(unblockNode->numSuccedents == 1);
1359 1.3 oster RF_ASSERT(termNode->numAntecedents == 1);
1360 1.3 oster RF_ASSERT(termNode->numSuccedents == 0);
1361 1.3 oster unblockNode->succedents[0] = termNode;
1362 1.3 oster termNode->antecedents[0] = unblockNode;
1363 1.3 oster termNode->antType[0] = rf_control;
1364 1.1 oster }
1365