rf_dagffwr.c revision 1.34.30.2 1 1.34.30.2 martin /* $NetBSD: rf_dagffwr.c,v 1.34.30.2 2020/04/13 08:04:47 martin Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster
29 1.1 oster /*
30 1.1 oster * rf_dagff.c
31 1.1 oster *
32 1.1 oster * code for creating fault-free DAGs
33 1.1 oster *
34 1.1 oster */
35 1.7 lukem
36 1.7 lukem #include <sys/cdefs.h>
37 1.34.30.2 martin __KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 1.34.30.2 2020/04/13 08:04:47 martin Exp $");
38 1.1 oster
39 1.6 oster #include <dev/raidframe/raidframevar.h>
40 1.6 oster
41 1.1 oster #include "rf_raid.h"
42 1.1 oster #include "rf_dag.h"
43 1.1 oster #include "rf_dagutils.h"
44 1.1 oster #include "rf_dagfuncs.h"
45 1.1 oster #include "rf_debugMem.h"
46 1.1 oster #include "rf_dagffrd.h"
47 1.1 oster #include "rf_general.h"
48 1.1 oster #include "rf_dagffwr.h"
49 1.23 oster #include "rf_map.h"
50 1.1 oster
51 1.1 oster /******************************************************************************
52 1.1 oster *
53 1.1 oster * General comments on DAG creation:
54 1.3 oster *
55 1.1 oster * All DAGs in this file use roll-away error recovery. Each DAG has a single
56 1.1 oster * commit node, usually called "Cmt." If an error occurs before the Cmt node
57 1.1 oster * is reached, the execution engine will halt forward execution and work
58 1.1 oster * backward through the graph, executing the undo functions. Assuming that
59 1.1 oster * each node in the graph prior to the Cmt node are undoable and atomic - or -
60 1.1 oster * does not make changes to permanent state, the graph will fail atomically.
61 1.1 oster * If an error occurs after the Cmt node executes, the engine will roll-forward
62 1.1 oster * through the graph, blindly executing nodes until it reaches the end.
63 1.1 oster * If a graph reaches the end, it is assumed to have completed successfully.
64 1.1 oster *
65 1.1 oster * A graph has only 1 Cmt node.
66 1.1 oster *
67 1.1 oster */
68 1.1 oster
69 1.1 oster
70 1.1 oster /******************************************************************************
71 1.1 oster *
72 1.1 oster * The following wrappers map the standard DAG creation interface to the
73 1.1 oster * DAG creation routines. Additionally, these wrappers enable experimentation
74 1.1 oster * with new DAG structures by providing an extra level of indirection, allowing
75 1.1 oster * the DAG creation routines to be replaced at this single point.
76 1.1 oster */
77 1.1 oster
78 1.1 oster
79 1.29 perry void
80 1.13 oster rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
81 1.13 oster RF_DagHeader_t *dag_h, void *bp,
82 1.13 oster RF_RaidAccessFlags_t flags,
83 1.13 oster RF_AllocListElem_t *allocList,
84 1.33 christos RF_IoType_t type)
85 1.1 oster {
86 1.3 oster rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
87 1.14 oster RF_IO_TYPE_WRITE);
88 1.1 oster }
89 1.1 oster
90 1.29 perry void
91 1.13 oster rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
92 1.13 oster RF_DagHeader_t *dag_h, void *bp,
93 1.13 oster RF_RaidAccessFlags_t flags,
94 1.13 oster RF_AllocListElem_t *allocList,
95 1.33 christos RF_IoType_t type)
96 1.1 oster {
97 1.3 oster rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
98 1.14 oster RF_IO_TYPE_WRITE);
99 1.1 oster }
100 1.1 oster
101 1.29 perry void
102 1.13 oster rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
103 1.13 oster RF_DagHeader_t *dag_h, void *bp,
104 1.13 oster RF_RaidAccessFlags_t flags,
105 1.13 oster RF_AllocListElem_t *allocList)
106 1.1 oster {
107 1.3 oster /* "normal" rollaway */
108 1.29 perry rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
109 1.14 oster allocList, &rf_xorFuncs, NULL);
110 1.1 oster }
111 1.1 oster
112 1.29 perry void
113 1.13 oster rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
114 1.13 oster RF_DagHeader_t *dag_h, void *bp,
115 1.13 oster RF_RaidAccessFlags_t flags,
116 1.13 oster RF_AllocListElem_t *allocList)
117 1.1 oster {
118 1.3 oster /* "normal" rollaway */
119 1.29 perry rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
120 1.14 oster allocList, 1, rf_RegularXorFunc, RF_TRUE);
121 1.1 oster }
122 1.1 oster
123 1.1 oster
124 1.1 oster /******************************************************************************
125 1.1 oster *
126 1.1 oster * DAG creation code begins here
127 1.1 oster */
128 1.34.30.1 christos #define BUF_ALLOC(num) \
129 1.34.30.1 christos RF_MallocAndAdd(rf_RaidAddressToByte(raidPtr, num), allocList)
130 1.1 oster
131 1.1 oster
132 1.1 oster /******************************************************************************
133 1.1 oster *
134 1.1 oster * creates a DAG to perform a large-write operation:
135 1.1 oster *
136 1.1 oster * / Rod \ / Wnd \
137 1.1 oster * H -- block- Rod - Xor - Cmt - Wnd --- T
138 1.1 oster * \ Rod / \ Wnp /
139 1.1 oster * \[Wnq]/
140 1.1 oster *
141 1.1 oster * The XOR node also does the Q calculation in the P+Q architecture.
142 1.1 oster * All nodes are before the commit node (Cmt) are assumed to be atomic and
143 1.1 oster * undoable - or - they make no changes to permanent state.
144 1.1 oster *
145 1.1 oster * Rod = read old data
146 1.1 oster * Cmt = commit node
147 1.1 oster * Wnp = write new parity
148 1.1 oster * Wnd = write new data
149 1.1 oster * Wnq = write new "q"
150 1.1 oster * [] denotes optional segments in the graph
151 1.1 oster *
152 1.1 oster * Parameters: raidPtr - description of the physical array
153 1.1 oster * asmap - logical & physical addresses for this access
154 1.1 oster * bp - buffer ptr (holds write data)
155 1.3 oster * flags - general flags (e.g. disk locking)
156 1.1 oster * allocList - list of memory allocated in DAG creation
157 1.1 oster * nfaults - number of faults array can tolerate
158 1.1 oster * (equal to # redundancy units in stripe)
159 1.1 oster * redfuncs - list of redundancy generating functions
160 1.1 oster *
161 1.1 oster *****************************************************************************/
162 1.1 oster
163 1.29 perry void
164 1.13 oster rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
165 1.33 christos RF_DagHeader_t *dag_h, void *bp,
166 1.33 christos RF_RaidAccessFlags_t flags,
167 1.13 oster RF_AllocListElem_t *allocList,
168 1.34.30.2 martin int nfaults, void (*redFunc) (RF_DagNode_t *),
169 1.13 oster int allowBufferRecycle)
170 1.1 oster {
171 1.22 oster RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode;
172 1.34 martin RF_DagNode_t *blockNode, *commitNode, *termNode;
173 1.34 martin #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
174 1.34 martin RF_DagNode_t *wnqNode;
175 1.34 martin #endif
176 1.3 oster int nWndNodes, nRodNodes, i, nodeNum, asmNum;
177 1.3 oster RF_AccessStripeMapHeader_t *new_asm_h[2];
178 1.3 oster RF_StripeNum_t parityStripeID;
179 1.3 oster char *sosBuffer, *eosBuffer;
180 1.3 oster RF_ReconUnitNum_t which_ru;
181 1.3 oster RF_RaidLayout_t *layoutPtr;
182 1.3 oster RF_PhysDiskAddr_t *pda;
183 1.3 oster
184 1.3 oster layoutPtr = &(raidPtr->Layout);
185 1.29 perry parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
186 1.14 oster asmap->raidAddress,
187 1.14 oster &which_ru);
188 1.3 oster
189 1.19 oster #if RF_DEBUG_DAG
190 1.3 oster if (rf_dagDebug) {
191 1.3 oster printf("[Creating large-write DAG]\n");
192 1.3 oster }
193 1.19 oster #endif
194 1.3 oster dag_h->creator = "LargeWriteDAG";
195 1.3 oster
196 1.3 oster dag_h->numCommitNodes = 1;
197 1.3 oster dag_h->numCommits = 0;
198 1.3 oster dag_h->numSuccedents = 1;
199 1.3 oster
200 1.3 oster /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */
201 1.3 oster nWndNodes = asmap->numStripeUnitsAccessed;
202 1.22 oster
203 1.22 oster for (i = 0; i < nWndNodes; i++) {
204 1.22 oster tmpNode = rf_AllocDAGNode();
205 1.22 oster tmpNode->list_next = dag_h->nodes;
206 1.22 oster dag_h->nodes = tmpNode;
207 1.22 oster }
208 1.22 oster wndNodes = dag_h->nodes;
209 1.22 oster
210 1.22 oster xorNode = rf_AllocDAGNode();
211 1.22 oster xorNode->list_next = dag_h->nodes;
212 1.22 oster dag_h->nodes = xorNode;
213 1.22 oster
214 1.22 oster wnpNode = rf_AllocDAGNode();
215 1.22 oster wnpNode->list_next = dag_h->nodes;
216 1.22 oster dag_h->nodes = wnpNode;
217 1.22 oster
218 1.22 oster blockNode = rf_AllocDAGNode();
219 1.22 oster blockNode->list_next = dag_h->nodes;
220 1.22 oster dag_h->nodes = blockNode;
221 1.22 oster
222 1.22 oster commitNode = rf_AllocDAGNode();
223 1.22 oster commitNode->list_next = dag_h->nodes;
224 1.22 oster dag_h->nodes = commitNode;
225 1.22 oster
226 1.22 oster termNode = rf_AllocDAGNode();
227 1.22 oster termNode->list_next = dag_h->nodes;
228 1.22 oster dag_h->nodes = termNode;
229 1.22 oster
230 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
231 1.3 oster if (nfaults == 2) {
232 1.22 oster wnqNode = rf_AllocDAGNode();
233 1.3 oster } else {
234 1.3 oster wnqNode = NULL;
235 1.3 oster }
236 1.20 oster #endif
237 1.29 perry rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
238 1.29 perry new_asm_h, &nRodNodes, &sosBuffer,
239 1.14 oster &eosBuffer, allocList);
240 1.3 oster if (nRodNodes > 0) {
241 1.22 oster for (i = 0; i < nRodNodes; i++) {
242 1.22 oster tmpNode = rf_AllocDAGNode();
243 1.22 oster tmpNode->list_next = dag_h->nodes;
244 1.22 oster dag_h->nodes = tmpNode;
245 1.22 oster }
246 1.22 oster rodNodes = dag_h->nodes;
247 1.3 oster } else {
248 1.3 oster rodNodes = NULL;
249 1.3 oster }
250 1.3 oster
251 1.3 oster /* begin node initialization */
252 1.3 oster if (nRodNodes > 0) {
253 1.29 perry rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
254 1.29 perry rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0,
255 1.14 oster dag_h, "Nil", allocList);
256 1.3 oster } else {
257 1.29 perry rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
258 1.29 perry rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0,
259 1.14 oster dag_h, "Nil", allocList);
260 1.3 oster }
261 1.3 oster
262 1.29 perry rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
263 1.29 perry rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0,
264 1.14 oster dag_h, "Cmt", allocList);
265 1.29 perry rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
266 1.29 perry rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0,
267 1.14 oster dag_h, "Trm", allocList);
268 1.3 oster
269 1.3 oster /* initialize the Rod nodes */
270 1.22 oster tmpNode = rodNodes;
271 1.3 oster for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
272 1.3 oster if (new_asm_h[asmNum]) {
273 1.3 oster pda = new_asm_h[asmNum]->stripeMap->physInfo;
274 1.3 oster while (pda) {
275 1.29 perry rf_InitNode(tmpNode, rf_wait,
276 1.14 oster RF_FALSE, rf_DiskReadFunc,
277 1.29 perry rf_DiskReadUndoFunc,
278 1.29 perry rf_GenericWakeupFunc,
279 1.14 oster 1, 1, 4, 0, dag_h,
280 1.14 oster "Rod", allocList);
281 1.22 oster tmpNode->params[0].p = pda;
282 1.22 oster tmpNode->params[1].p = pda->bufPtr;
283 1.22 oster tmpNode->params[2].v = parityStripeID;
284 1.22 oster tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
285 1.17 oster which_ru);
286 1.3 oster nodeNum++;
287 1.3 oster pda = pda->next;
288 1.22 oster tmpNode = tmpNode->list_next;
289 1.3 oster }
290 1.3 oster }
291 1.3 oster }
292 1.3 oster RF_ASSERT(nodeNum == nRodNodes);
293 1.3 oster
294 1.3 oster /* initialize the wnd nodes */
295 1.3 oster pda = asmap->physInfo;
296 1.22 oster tmpNode = wndNodes;
297 1.3 oster for (i = 0; i < nWndNodes; i++) {
298 1.29 perry rf_InitNode(tmpNode, rf_wait, RF_FALSE,
299 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
300 1.29 perry rf_GenericWakeupFunc, 1, 1, 4, 0,
301 1.14 oster dag_h, "Wnd", allocList);
302 1.3 oster RF_ASSERT(pda != NULL);
303 1.22 oster tmpNode->params[0].p = pda;
304 1.22 oster tmpNode->params[1].p = pda->bufPtr;
305 1.22 oster tmpNode->params[2].v = parityStripeID;
306 1.22 oster tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
307 1.3 oster pda = pda->next;
308 1.22 oster tmpNode = tmpNode->list_next;
309 1.3 oster }
310 1.3 oster
311 1.3 oster /* initialize the redundancy node */
312 1.3 oster if (nRodNodes > 0) {
313 1.29 perry rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
314 1.14 oster rf_NullNodeUndoFunc, NULL, 1,
315 1.29 perry nRodNodes, 2 * (nWndNodes + nRodNodes) + 1,
316 1.14 oster nfaults, dag_h, "Xr ", allocList);
317 1.3 oster } else {
318 1.29 perry rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
319 1.14 oster rf_NullNodeUndoFunc, NULL, 1,
320 1.29 perry 1, 2 * (nWndNodes + nRodNodes) + 1,
321 1.14 oster nfaults, dag_h, "Xr ", allocList);
322 1.3 oster }
323 1.3 oster xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
324 1.22 oster tmpNode = wndNodes;
325 1.3 oster for (i = 0; i < nWndNodes; i++) {
326 1.14 oster /* pda */
327 1.22 oster xorNode->params[2 * i + 0] = tmpNode->params[0];
328 1.29 perry /* buf ptr */
329 1.22 oster xorNode->params[2 * i + 1] = tmpNode->params[1];
330 1.22 oster tmpNode = tmpNode->list_next;
331 1.3 oster }
332 1.22 oster tmpNode = rodNodes;
333 1.3 oster for (i = 0; i < nRodNodes; i++) {
334 1.14 oster /* pda */
335 1.22 oster xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0];
336 1.14 oster /* buf ptr */
337 1.22 oster xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1];
338 1.22 oster tmpNode = tmpNode->list_next;
339 1.3 oster }
340 1.3 oster /* xor node needs to get at RAID information */
341 1.3 oster xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
342 1.3 oster
343 1.3 oster /*
344 1.14 oster * Look for an Rod node that reads a complete SU. If none,
345 1.14 oster * alloc a buffer to receive the parity info. Note that we
346 1.14 oster * can't use a new data buffer because it will not have gotten
347 1.14 oster * written when the xor occurs. */
348 1.3 oster if (allowBufferRecycle) {
349 1.22 oster tmpNode = rodNodes;
350 1.3 oster for (i = 0; i < nRodNodes; i++) {
351 1.22 oster if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
352 1.3 oster break;
353 1.22 oster tmpNode = tmpNode->list_next;
354 1.3 oster }
355 1.3 oster }
356 1.3 oster if ((!allowBufferRecycle) || (i == nRodNodes)) {
357 1.27 oster xorNode->results[0] = rf_AllocBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit));
358 1.3 oster } else {
359 1.22 oster /* this works because the only way we get here is if
360 1.22 oster allowBufferRecycle is true and we went through the
361 1.22 oster above for loop, and exited via the break before
362 1.22 oster i==nRodNodes was true. That means tmpNode will
363 1.22 oster still point to a valid node -- the one we want for
364 1.22 oster here! */
365 1.22 oster xorNode->results[0] = tmpNode->params[1].p;
366 1.3 oster }
367 1.3 oster
368 1.3 oster /* initialize the Wnp node */
369 1.29 perry rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
370 1.29 perry rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
371 1.14 oster dag_h, "Wnp", allocList);
372 1.3 oster wnpNode->params[0].p = asmap->parityInfo;
373 1.3 oster wnpNode->params[1].p = xorNode->results[0];
374 1.3 oster wnpNode->params[2].v = parityStripeID;
375 1.17 oster wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
376 1.3 oster /* parityInfo must describe entire parity unit */
377 1.3 oster RF_ASSERT(asmap->parityInfo->next == NULL);
378 1.3 oster
379 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
380 1.3 oster if (nfaults == 2) {
381 1.3 oster /*
382 1.3 oster * We never try to recycle a buffer for the Q calcuation
383 1.3 oster * in addition to the parity. This would cause two buffers
384 1.3 oster * to get smashed during the P and Q calculation, guaranteeing
385 1.3 oster * one would be wrong.
386 1.3 oster */
387 1.34.30.1 christos xorNode->results[1] =
388 1.34.30.1 christos BUF_ALLOC(raidPtr->Layout.sectorsPerStripeUnit);
389 1.29 perry rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
390 1.29 perry rf_DiskWriteUndoFunc, rf_GenericWakeupFunc,
391 1.14 oster 1, 1, 4, 0, dag_h, "Wnq", allocList);
392 1.3 oster wnqNode->params[0].p = asmap->qInfo;
393 1.3 oster wnqNode->params[1].p = xorNode->results[1];
394 1.3 oster wnqNode->params[2].v = parityStripeID;
395 1.17 oster wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
396 1.3 oster /* parityInfo must describe entire parity unit */
397 1.3 oster RF_ASSERT(asmap->parityInfo->next == NULL);
398 1.3 oster }
399 1.20 oster #endif
400 1.3 oster /*
401 1.3 oster * Connect nodes to form graph.
402 1.3 oster */
403 1.3 oster
404 1.3 oster /* connect dag header to block node */
405 1.3 oster RF_ASSERT(blockNode->numAntecedents == 0);
406 1.3 oster dag_h->succedents[0] = blockNode;
407 1.3 oster
408 1.3 oster if (nRodNodes > 0) {
409 1.3 oster /* connect the block node to the Rod nodes */
410 1.3 oster RF_ASSERT(blockNode->numSuccedents == nRodNodes);
411 1.3 oster RF_ASSERT(xorNode->numAntecedents == nRodNodes);
412 1.22 oster tmpNode = rodNodes;
413 1.3 oster for (i = 0; i < nRodNodes; i++) {
414 1.28 oster RF_ASSERT(tmpNode->numAntecedents == 1);
415 1.22 oster blockNode->succedents[i] = tmpNode;
416 1.22 oster tmpNode->antecedents[0] = blockNode;
417 1.22 oster tmpNode->antType[0] = rf_control;
418 1.3 oster
419 1.3 oster /* connect the Rod nodes to the Xor node */
420 1.28 oster RF_ASSERT(tmpNode->numSuccedents == 1);
421 1.22 oster tmpNode->succedents[0] = xorNode;
422 1.22 oster xorNode->antecedents[i] = tmpNode;
423 1.3 oster xorNode->antType[i] = rf_trueData;
424 1.22 oster tmpNode = tmpNode->list_next;
425 1.3 oster }
426 1.3 oster } else {
427 1.3 oster /* connect the block node to the Xor node */
428 1.3 oster RF_ASSERT(blockNode->numSuccedents == 1);
429 1.3 oster RF_ASSERT(xorNode->numAntecedents == 1);
430 1.3 oster blockNode->succedents[0] = xorNode;
431 1.3 oster xorNode->antecedents[0] = blockNode;
432 1.3 oster xorNode->antType[0] = rf_control;
433 1.3 oster }
434 1.3 oster
435 1.3 oster /* connect the xor node to the commit node */
436 1.3 oster RF_ASSERT(xorNode->numSuccedents == 1);
437 1.3 oster RF_ASSERT(commitNode->numAntecedents == 1);
438 1.3 oster xorNode->succedents[0] = commitNode;
439 1.3 oster commitNode->antecedents[0] = xorNode;
440 1.3 oster commitNode->antType[0] = rf_control;
441 1.3 oster
442 1.3 oster /* connect the commit node to the write nodes */
443 1.3 oster RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
444 1.22 oster tmpNode = wndNodes;
445 1.3 oster for (i = 0; i < nWndNodes; i++) {
446 1.3 oster RF_ASSERT(wndNodes->numAntecedents == 1);
447 1.22 oster commitNode->succedents[i] = tmpNode;
448 1.22 oster tmpNode->antecedents[0] = commitNode;
449 1.22 oster tmpNode->antType[0] = rf_control;
450 1.22 oster tmpNode = tmpNode->list_next;
451 1.3 oster }
452 1.3 oster RF_ASSERT(wnpNode->numAntecedents == 1);
453 1.3 oster commitNode->succedents[nWndNodes] = wnpNode;
454 1.3 oster wnpNode->antecedents[0] = commitNode;
455 1.3 oster wnpNode->antType[0] = rf_trueData;
456 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
457 1.3 oster if (nfaults == 2) {
458 1.3 oster RF_ASSERT(wnqNode->numAntecedents == 1);
459 1.3 oster commitNode->succedents[nWndNodes + 1] = wnqNode;
460 1.3 oster wnqNode->antecedents[0] = commitNode;
461 1.3 oster wnqNode->antType[0] = rf_trueData;
462 1.3 oster }
463 1.20 oster #endif
464 1.3 oster /* connect the write nodes to the term node */
465 1.3 oster RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
466 1.3 oster RF_ASSERT(termNode->numSuccedents == 0);
467 1.22 oster tmpNode = wndNodes;
468 1.3 oster for (i = 0; i < nWndNodes; i++) {
469 1.3 oster RF_ASSERT(wndNodes->numSuccedents == 1);
470 1.22 oster tmpNode->succedents[0] = termNode;
471 1.22 oster termNode->antecedents[i] = tmpNode;
472 1.3 oster termNode->antType[i] = rf_control;
473 1.22 oster tmpNode = tmpNode->list_next;
474 1.3 oster }
475 1.3 oster RF_ASSERT(wnpNode->numSuccedents == 1);
476 1.3 oster wnpNode->succedents[0] = termNode;
477 1.3 oster termNode->antecedents[nWndNodes] = wnpNode;
478 1.3 oster termNode->antType[nWndNodes] = rf_control;
479 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
480 1.3 oster if (nfaults == 2) {
481 1.3 oster RF_ASSERT(wnqNode->numSuccedents == 1);
482 1.3 oster wnqNode->succedents[0] = termNode;
483 1.3 oster termNode->antecedents[nWndNodes + 1] = wnqNode;
484 1.3 oster termNode->antType[nWndNodes + 1] = rf_control;
485 1.3 oster }
486 1.20 oster #endif
487 1.1 oster }
488 1.1 oster /******************************************************************************
489 1.1 oster *
490 1.1 oster * creates a DAG to perform a small-write operation (either raid 5 or pq),
491 1.1 oster * which is as follows:
492 1.1 oster *
493 1.1 oster * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
494 1.1 oster * \- Rod X / \----> Wnd [Und]-/
495 1.1 oster * [\- Rod X / \---> Wnd [Und]-/]
496 1.1 oster * [\- Roq -> Q / \--> Wnq [Unq]-/]
497 1.1 oster *
498 1.1 oster * Rop = read old parity
499 1.1 oster * Rod = read old data
500 1.1 oster * Roq = read old "q"
501 1.1 oster * Cmt = commit node
502 1.1 oster * Und = unlock data disk
503 1.1 oster * Unp = unlock parity disk
504 1.1 oster * Unq = unlock q disk
505 1.1 oster * Wnp = write new parity
506 1.1 oster * Wnd = write new data
507 1.1 oster * Wnq = write new "q"
508 1.1 oster * [ ] denotes optional segments in the graph
509 1.1 oster *
510 1.1 oster * Parameters: raidPtr - description of the physical array
511 1.1 oster * asmap - logical & physical addresses for this access
512 1.1 oster * bp - buffer ptr (holds write data)
513 1.3 oster * flags - general flags (e.g. disk locking)
514 1.1 oster * allocList - list of memory allocated in DAG creation
515 1.1 oster * pfuncs - list of parity generating functions
516 1.1 oster * qfuncs - list of q generating functions
517 1.1 oster *
518 1.1 oster * A null qfuncs indicates single fault tolerant
519 1.1 oster *****************************************************************************/
520 1.1 oster
521 1.29 perry void
522 1.13 oster rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
523 1.33 christos RF_DagHeader_t *dag_h, void *bp,
524 1.33 christos RF_RaidAccessFlags_t flags,
525 1.13 oster RF_AllocListElem_t *allocList,
526 1.13 oster const RF_RedFuncs_t *pfuncs,
527 1.13 oster const RF_RedFuncs_t *qfuncs)
528 1.1 oster {
529 1.34 martin RF_DagNode_t *readDataNodes, *readParityNodes, *termNode;
530 1.22 oster RF_DagNode_t *tmpNode, *tmpreadDataNode, *tmpreadParityNode;
531 1.34 martin RF_DagNode_t *xorNodes, *blockNode, *commitNode;
532 1.34 martin RF_DagNode_t *writeDataNodes, *writeParityNodes;
533 1.34 martin RF_DagNode_t *tmpxorNode, *tmpwriteDataNode;
534 1.22 oster RF_DagNode_t *tmpwriteParityNode;
535 1.22 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
536 1.34 martin RF_DagNode_t *tmpwriteQNode, *tmpreadQNode, *tmpqNode, *readQNodes,
537 1.34 martin *writeQNodes, *qNodes;
538 1.22 oster #endif
539 1.34 martin int i, j, nNodes;
540 1.3 oster RF_ReconUnitNum_t which_ru;
541 1.34.30.2 martin void (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
542 1.34.30.2 martin void (*qfunc) (RF_DagNode_t *) __unused;
543 1.3 oster int numDataNodes, numParityNodes;
544 1.3 oster RF_StripeNum_t parityStripeID;
545 1.3 oster RF_PhysDiskAddr_t *pda;
546 1.34 martin const char *name, *qname __unused;
547 1.3 oster long nfaults;
548 1.3 oster
549 1.3 oster nfaults = qfuncs ? 2 : 1;
550 1.3 oster
551 1.3 oster parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
552 1.3 oster asmap->raidAddress, &which_ru);
553 1.3 oster pda = asmap->physInfo;
554 1.3 oster numDataNodes = asmap->numStripeUnitsAccessed;
555 1.3 oster numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
556 1.3 oster
557 1.19 oster #if RF_DEBUG_DAG
558 1.3 oster if (rf_dagDebug) {
559 1.3 oster printf("[Creating small-write DAG]\n");
560 1.3 oster }
561 1.19 oster #endif
562 1.3 oster RF_ASSERT(numDataNodes > 0);
563 1.3 oster dag_h->creator = "SmallWriteDAG";
564 1.3 oster
565 1.3 oster dag_h->numCommitNodes = 1;
566 1.3 oster dag_h->numCommits = 0;
567 1.3 oster dag_h->numSuccedents = 1;
568 1.3 oster
569 1.3 oster /*
570 1.3 oster * DAG creation occurs in four steps:
571 1.3 oster * 1. count the number of nodes in the DAG
572 1.3 oster * 2. create the nodes
573 1.3 oster * 3. initialize the nodes
574 1.3 oster * 4. connect the nodes
575 1.3 oster */
576 1.3 oster
577 1.3 oster /*
578 1.3 oster * Step 1. compute number of nodes in the graph
579 1.3 oster */
580 1.3 oster
581 1.14 oster /* number of nodes: a read and write for each data unit a
582 1.14 oster * redundancy computation node for each parity node (nfaults *
583 1.14 oster * nparity) a read and write for each parity unit a block and
584 1.14 oster * commit node (2) a terminate node if atomic RMW an unlock
585 1.34 martin * node for each data unit, redundancy unit
586 1.34 martin * totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
587 1.34 martin * + (nfaults * 2 * numParityNodes) + 3;
588 1.34 martin */
589 1.34 martin
590 1.3 oster /*
591 1.3 oster * Step 2. create the nodes
592 1.3 oster */
593 1.22 oster
594 1.22 oster blockNode = rf_AllocDAGNode();
595 1.22 oster blockNode->list_next = dag_h->nodes;
596 1.22 oster dag_h->nodes = blockNode;
597 1.22 oster
598 1.22 oster commitNode = rf_AllocDAGNode();
599 1.22 oster commitNode->list_next = dag_h->nodes;
600 1.22 oster dag_h->nodes = commitNode;
601 1.22 oster
602 1.22 oster for (i = 0; i < numDataNodes; i++) {
603 1.22 oster tmpNode = rf_AllocDAGNode();
604 1.22 oster tmpNode->list_next = dag_h->nodes;
605 1.22 oster dag_h->nodes = tmpNode;
606 1.22 oster }
607 1.22 oster readDataNodes = dag_h->nodes;
608 1.22 oster
609 1.22 oster for (i = 0; i < numParityNodes; i++) {
610 1.22 oster tmpNode = rf_AllocDAGNode();
611 1.22 oster tmpNode->list_next = dag_h->nodes;
612 1.22 oster dag_h->nodes = tmpNode;
613 1.22 oster }
614 1.22 oster readParityNodes = dag_h->nodes;
615 1.29 perry
616 1.22 oster for (i = 0; i < numDataNodes; i++) {
617 1.22 oster tmpNode = rf_AllocDAGNode();
618 1.22 oster tmpNode->list_next = dag_h->nodes;
619 1.22 oster dag_h->nodes = tmpNode;
620 1.22 oster }
621 1.22 oster writeDataNodes = dag_h->nodes;
622 1.22 oster
623 1.22 oster for (i = 0; i < numParityNodes; i++) {
624 1.22 oster tmpNode = rf_AllocDAGNode();
625 1.22 oster tmpNode->list_next = dag_h->nodes;
626 1.22 oster dag_h->nodes = tmpNode;
627 1.22 oster }
628 1.22 oster writeParityNodes = dag_h->nodes;
629 1.22 oster
630 1.22 oster for (i = 0; i < numParityNodes; i++) {
631 1.22 oster tmpNode = rf_AllocDAGNode();
632 1.22 oster tmpNode->list_next = dag_h->nodes;
633 1.22 oster dag_h->nodes = tmpNode;
634 1.22 oster }
635 1.22 oster xorNodes = dag_h->nodes;
636 1.22 oster
637 1.22 oster termNode = rf_AllocDAGNode();
638 1.22 oster termNode->list_next = dag_h->nodes;
639 1.22 oster dag_h->nodes = termNode;
640 1.16 oster
641 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
642 1.3 oster if (nfaults == 2) {
643 1.22 oster for (i = 0; i < numParityNodes; i++) {
644 1.22 oster tmpNode = rf_AllocDAGNode();
645 1.22 oster tmpNode->list_next = dag_h->nodes;
646 1.22 oster dag_h->nodes = tmpNode;
647 1.22 oster }
648 1.22 oster readQNodes = dag_h->nodes;
649 1.22 oster
650 1.22 oster for (i = 0; i < numParityNodes; i++) {
651 1.22 oster tmpNode = rf_AllocDAGNode();
652 1.22 oster tmpNode->list_next = dag_h->nodes;
653 1.22 oster dag_h->nodes = tmpNode;
654 1.22 oster }
655 1.22 oster writeQNodes = dag_h->nodes;
656 1.22 oster
657 1.22 oster for (i = 0; i < numParityNodes; i++) {
658 1.22 oster tmpNode = rf_AllocDAGNode();
659 1.22 oster tmpNode->list_next = dag_h->nodes;
660 1.22 oster dag_h->nodes = tmpNode;
661 1.22 oster }
662 1.22 oster qNodes = dag_h->nodes;
663 1.3 oster } else {
664 1.18 oster readQNodes = writeQNodes = qNodes = NULL;
665 1.3 oster }
666 1.20 oster #endif
667 1.3 oster
668 1.3 oster /*
669 1.3 oster * Step 3. initialize the nodes
670 1.3 oster */
671 1.3 oster /* initialize block node (Nil) */
672 1.3 oster nNodes = numDataNodes + (nfaults * numParityNodes);
673 1.29 perry rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
674 1.29 perry rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0,
675 1.14 oster dag_h, "Nil", allocList);
676 1.3 oster
677 1.3 oster /* initialize commit node (Cmt) */
678 1.29 perry rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
679 1.29 perry rf_NullNodeUndoFunc, NULL, nNodes,
680 1.14 oster (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
681 1.3 oster
682 1.3 oster /* initialize terminate node (Trm) */
683 1.29 perry rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
684 1.29 perry rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0,
685 1.14 oster dag_h, "Trm", allocList);
686 1.3 oster
687 1.3 oster /* initialize nodes which read old data (Rod) */
688 1.22 oster tmpreadDataNode = readDataNodes;
689 1.3 oster for (i = 0; i < numDataNodes; i++) {
690 1.29 perry rf_InitNode(tmpreadDataNode, rf_wait, RF_FALSE,
691 1.14 oster rf_DiskReadFunc, rf_DiskReadUndoFunc,
692 1.29 perry rf_GenericWakeupFunc, (nfaults * numParityNodes),
693 1.14 oster 1, 4, 0, dag_h, "Rod", allocList);
694 1.3 oster RF_ASSERT(pda != NULL);
695 1.3 oster /* physical disk addr desc */
696 1.22 oster tmpreadDataNode->params[0].p = pda;
697 1.3 oster /* buffer to hold old data */
698 1.27 oster tmpreadDataNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
699 1.22 oster tmpreadDataNode->params[2].v = parityStripeID;
700 1.22 oster tmpreadDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
701 1.17 oster which_ru);
702 1.3 oster pda = pda->next;
703 1.22 oster for (j = 0; j < tmpreadDataNode->numSuccedents; j++) {
704 1.22 oster tmpreadDataNode->propList[j] = NULL;
705 1.3 oster }
706 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
707 1.3 oster }
708 1.3 oster
709 1.3 oster /* initialize nodes which read old parity (Rop) */
710 1.3 oster pda = asmap->parityInfo;
711 1.3 oster i = 0;
712 1.22 oster tmpreadParityNode = readParityNodes;
713 1.3 oster for (i = 0; i < numParityNodes; i++) {
714 1.3 oster RF_ASSERT(pda != NULL);
715 1.29 perry rf_InitNode(tmpreadParityNode, rf_wait, RF_FALSE,
716 1.14 oster rf_DiskReadFunc, rf_DiskReadUndoFunc,
717 1.29 perry rf_GenericWakeupFunc, numParityNodes, 1, 4, 0,
718 1.14 oster dag_h, "Rop", allocList);
719 1.22 oster tmpreadParityNode->params[0].p = pda;
720 1.3 oster /* buffer to hold old parity */
721 1.27 oster tmpreadParityNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
722 1.22 oster tmpreadParityNode->params[2].v = parityStripeID;
723 1.22 oster tmpreadParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
724 1.17 oster which_ru);
725 1.3 oster pda = pda->next;
726 1.22 oster for (j = 0; j < tmpreadParityNode->numSuccedents; j++) {
727 1.22 oster tmpreadParityNode->propList[0] = NULL;
728 1.3 oster }
729 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
730 1.3 oster }
731 1.3 oster
732 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
733 1.3 oster /* initialize nodes which read old Q (Roq) */
734 1.3 oster if (nfaults == 2) {
735 1.3 oster pda = asmap->qInfo;
736 1.22 oster tmpreadQNode = readQNodes;
737 1.3 oster for (i = 0; i < numParityNodes; i++) {
738 1.3 oster RF_ASSERT(pda != NULL);
739 1.29 perry rf_InitNode(tmpreadQNode, rf_wait, RF_FALSE,
740 1.14 oster rf_DiskReadFunc, rf_DiskReadUndoFunc,
741 1.29 perry rf_GenericWakeupFunc, numParityNodes,
742 1.14 oster 1, 4, 0, dag_h, "Roq", allocList);
743 1.22 oster tmpreadQNode->params[0].p = pda;
744 1.3 oster /* buffer to hold old Q */
745 1.24 oster tmpreadQNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h,
746 1.24 oster pda->numSector << raidPtr->logBytesPerSector);
747 1.22 oster tmpreadQNode->params[2].v = parityStripeID;
748 1.22 oster tmpreadQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
749 1.17 oster which_ru);
750 1.3 oster pda = pda->next;
751 1.22 oster for (j = 0; j < tmpreadQNode->numSuccedents; j++) {
752 1.22 oster tmpreadQNode->propList[0] = NULL;
753 1.3 oster }
754 1.22 oster tmpreadQNode = tmpreadQNode->list_next;
755 1.3 oster }
756 1.3 oster }
757 1.20 oster #endif
758 1.3 oster /* initialize nodes which write new data (Wnd) */
759 1.3 oster pda = asmap->physInfo;
760 1.22 oster tmpwriteDataNode = writeDataNodes;
761 1.3 oster for (i = 0; i < numDataNodes; i++) {
762 1.3 oster RF_ASSERT(pda != NULL);
763 1.29 perry rf_InitNode(tmpwriteDataNode, rf_wait, RF_FALSE,
764 1.29 perry rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
765 1.14 oster rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
766 1.14 oster "Wnd", allocList);
767 1.3 oster /* physical disk addr desc */
768 1.22 oster tmpwriteDataNode->params[0].p = pda;
769 1.3 oster /* buffer holding new data to be written */
770 1.22 oster tmpwriteDataNode->params[1].p = pda->bufPtr;
771 1.22 oster tmpwriteDataNode->params[2].v = parityStripeID;
772 1.22 oster tmpwriteDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
773 1.17 oster which_ru);
774 1.3 oster pda = pda->next;
775 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
776 1.3 oster }
777 1.3 oster
778 1.3 oster /*
779 1.3 oster * Initialize nodes which compute new parity and Q.
780 1.3 oster */
781 1.3 oster /*
782 1.3 oster * We use the simple XOR func in the double-XOR case, and when
783 1.14 oster * we're accessing only a portion of one stripe unit. The
784 1.14 oster * distinction between the two is that the regular XOR func
785 1.14 oster * assumes that the targbuf is a full SU in size, and examines
786 1.14 oster * the pda associated with the buffer to decide where within
787 1.14 oster * the buffer to XOR the data, whereas the simple XOR func
788 1.14 oster * just XORs the data into the start of the buffer. */
789 1.3 oster if ((numParityNodes == 2) || ((numDataNodes == 1)
790 1.29 perry && (asmap->totalSectorsAccessed <
791 1.14 oster raidPtr->Layout.sectorsPerStripeUnit))) {
792 1.3 oster func = pfuncs->simple;
793 1.3 oster undoFunc = rf_NullNodeUndoFunc;
794 1.3 oster name = pfuncs->SimpleName;
795 1.3 oster if (qfuncs) {
796 1.3 oster qfunc = qfuncs->simple;
797 1.3 oster qname = qfuncs->SimpleName;
798 1.3 oster } else {
799 1.3 oster qfunc = NULL;
800 1.3 oster qname = NULL;
801 1.3 oster }
802 1.3 oster } else {
803 1.3 oster func = pfuncs->regular;
804 1.3 oster undoFunc = rf_NullNodeUndoFunc;
805 1.3 oster name = pfuncs->RegularName;
806 1.3 oster if (qfuncs) {
807 1.3 oster qfunc = qfuncs->regular;
808 1.3 oster qname = qfuncs->RegularName;
809 1.3 oster } else {
810 1.3 oster qfunc = NULL;
811 1.3 oster qname = NULL;
812 1.3 oster }
813 1.3 oster }
814 1.3 oster /*
815 1.3 oster * Initialize the xor nodes: params are {pda,buf}
816 1.3 oster * from {Rod,Wnd,Rop} nodes, and raidPtr
817 1.3 oster */
818 1.3 oster if (numParityNodes == 2) {
819 1.3 oster /* double-xor case */
820 1.22 oster tmpxorNode = xorNodes;
821 1.22 oster tmpreadDataNode = readDataNodes;
822 1.22 oster tmpreadParityNode = readParityNodes;
823 1.22 oster tmpwriteDataNode = writeDataNodes;
824 1.34 martin #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
825 1.22 oster tmpqNode = qNodes;
826 1.22 oster tmpreadQNode = readQNodes;
827 1.34 martin #endif
828 1.3 oster for (i = 0; i < numParityNodes; i++) {
829 1.3 oster /* note: no wakeup func for xor */
830 1.29 perry rf_InitNode(tmpxorNode, rf_wait, RF_FALSE, func,
831 1.29 perry undoFunc, NULL, 1,
832 1.29 perry (numDataNodes + numParityNodes),
833 1.14 oster 7, 1, dag_h, name, allocList);
834 1.22 oster tmpxorNode->flags |= RF_DAGNODE_FLAG_YIELD;
835 1.22 oster tmpxorNode->params[0] = tmpreadDataNode->params[0];
836 1.22 oster tmpxorNode->params[1] = tmpreadDataNode->params[1];
837 1.22 oster tmpxorNode->params[2] = tmpreadParityNode->params[0];
838 1.22 oster tmpxorNode->params[3] = tmpreadParityNode->params[1];
839 1.22 oster tmpxorNode->params[4] = tmpwriteDataNode->params[0];
840 1.22 oster tmpxorNode->params[5] = tmpwriteDataNode->params[1];
841 1.22 oster tmpxorNode->params[6].p = raidPtr;
842 1.3 oster /* use old parity buf as target buf */
843 1.22 oster tmpxorNode->results[0] = tmpreadParityNode->params[1].p;
844 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
845 1.3 oster if (nfaults == 2) {
846 1.3 oster /* note: no wakeup func for qor */
847 1.29 perry rf_InitNode(tmpqNode, rf_wait, RF_FALSE,
848 1.14 oster qfunc, undoFunc, NULL, 1,
849 1.29 perry (numDataNodes + numParityNodes),
850 1.14 oster 7, 1, dag_h, qname, allocList);
851 1.22 oster tmpqNode->params[0] = tmpreadDataNode->params[0];
852 1.22 oster tmpqNode->params[1] = tmpreadDataNode->params[1];
853 1.22 oster tmpqNode->params[2] = tmpreadQNode->.params[0];
854 1.22 oster tmpqNode->params[3] = tmpreadQNode->params[1];
855 1.22 oster tmpqNode->params[4] = tmpwriteDataNode->params[0];
856 1.22 oster tmpqNode->params[5] = tmpwriteDataNode->params[1];
857 1.22 oster tmpqNode->params[6].p = raidPtr;
858 1.3 oster /* use old Q buf as target buf */
859 1.22 oster tmpqNode->results[0] = tmpreadQNode->params[1].p;
860 1.22 oster tmpqNode = tmpqNode->list_next;
861 1.22 oster tmpreadQNodes = tmpreadQNodes->list_next;
862 1.3 oster }
863 1.20 oster #endif
864 1.22 oster tmpxorNode = tmpxorNode->list_next;
865 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
866 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
867 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
868 1.3 oster }
869 1.3 oster } else {
870 1.3 oster /* there is only one xor node in this case */
871 1.29 perry rf_InitNode(xorNodes, rf_wait, RF_FALSE, func,
872 1.14 oster undoFunc, NULL, 1, (numDataNodes + numParityNodes),
873 1.29 perry (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
874 1.14 oster dag_h, name, allocList);
875 1.22 oster xorNodes->flags |= RF_DAGNODE_FLAG_YIELD;
876 1.22 oster tmpreadDataNode = readDataNodes;
877 1.29 perry for (i = 0; i < numDataNodes; i++) { /* used to be"numDataNodes + 1" until we factored
878 1.22 oster out the "+1" into the "deal with Rop separately below */
879 1.22 oster /* set up params related to Rod nodes */
880 1.22 oster xorNodes->params[2 * i + 0] = tmpreadDataNode->params[0]; /* pda */
881 1.22 oster xorNodes->params[2 * i + 1] = tmpreadDataNode->params[1]; /* buffer ptr */
882 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
883 1.22 oster }
884 1.22 oster /* deal with Rop separately */
885 1.22 oster xorNodes->params[2 * numDataNodes + 0] = readParityNodes->params[0]; /* pda */
886 1.22 oster xorNodes->params[2 * numDataNodes + 1] = readParityNodes->params[1]; /* buffer ptr */
887 1.22 oster
888 1.22 oster tmpwriteDataNode = writeDataNodes;
889 1.3 oster for (i = 0; i < numDataNodes; i++) {
890 1.3 oster /* set up params related to Wnd and Wnp nodes */
891 1.22 oster xorNodes->params[2 * (numDataNodes + 1 + i) + 0] = /* pda */
892 1.22 oster tmpwriteDataNode->params[0];
893 1.22 oster xorNodes->params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */
894 1.22 oster tmpwriteDataNode->params[1];
895 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
896 1.3 oster }
897 1.3 oster /* xor node needs to get at RAID information */
898 1.22 oster xorNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
899 1.22 oster xorNodes->results[0] = readParityNodes->params[1].p;
900 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
901 1.3 oster if (nfaults == 2) {
902 1.29 perry rf_InitNode(qNodes, rf_wait, RF_FALSE, qfunc,
903 1.14 oster undoFunc, NULL, 1,
904 1.14 oster (numDataNodes + numParityNodes),
905 1.14 oster (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
906 1.14 oster dag_h, qname, allocList);
907 1.22 oster tmpreadDataNode = readDataNodes;
908 1.3 oster for (i = 0; i < numDataNodes; i++) {
909 1.3 oster /* set up params related to Rod */
910 1.22 oster qNodes->params[2 * i + 0] = tmpreadDataNode->params[0]; /* pda */
911 1.22 oster qNodes->params[2 * i + 1] = tmpreadDataNode->params[1]; /* buffer ptr */
912 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
913 1.3 oster }
914 1.3 oster /* and read old q */
915 1.22 oster qNodes->params[2 * numDataNodes + 0] = /* pda */
916 1.22 oster readQNodes->params[0];
917 1.22 oster qNodes->params[2 * numDataNodes + 1] = /* buffer ptr */
918 1.22 oster readQNodes->params[1];
919 1.22 oster tmpwriteDataNode = writeDataNodes;
920 1.3 oster for (i = 0; i < numDataNodes; i++) {
921 1.3 oster /* set up params related to Wnd nodes */
922 1.22 oster qNodes->params[2 * (numDataNodes + 1 + i) + 0] = /* pda */
923 1.22 oster tmpwriteDataNode->params[0];
924 1.22 oster qNodes->params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */
925 1.22 oster tmpwriteDataNode->params[1];
926 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
927 1.3 oster }
928 1.3 oster /* xor node needs to get at RAID information */
929 1.22 oster qNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
930 1.22 oster qNodes->results[0] = readQNodes->params[1].p;
931 1.3 oster }
932 1.20 oster #endif
933 1.3 oster }
934 1.3 oster
935 1.3 oster /* initialize nodes which write new parity (Wnp) */
936 1.3 oster pda = asmap->parityInfo;
937 1.22 oster tmpwriteParityNode = writeParityNodes;
938 1.22 oster tmpxorNode = xorNodes;
939 1.3 oster for (i = 0; i < numParityNodes; i++) {
940 1.29 perry rf_InitNode(tmpwriteParityNode, rf_wait, RF_FALSE,
941 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
942 1.14 oster rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
943 1.14 oster "Wnp", allocList);
944 1.3 oster RF_ASSERT(pda != NULL);
945 1.22 oster tmpwriteParityNode->params[0].p = pda; /* param 1 (bufPtr)
946 1.22 oster * filled in by xor node */
947 1.22 oster tmpwriteParityNode->params[1].p = tmpxorNode->results[0]; /* buffer pointer for
948 1.22 oster * parity write
949 1.22 oster * operation */
950 1.22 oster tmpwriteParityNode->params[2].v = parityStripeID;
951 1.22 oster tmpwriteParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
952 1.17 oster which_ru);
953 1.3 oster pda = pda->next;
954 1.22 oster tmpwriteParityNode = tmpwriteParityNode->list_next;
955 1.22 oster tmpxorNode = tmpxorNode->list_next;
956 1.3 oster }
957 1.3 oster
958 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
959 1.3 oster /* initialize nodes which write new Q (Wnq) */
960 1.3 oster if (nfaults == 2) {
961 1.3 oster pda = asmap->qInfo;
962 1.22 oster tmpwriteQNode = writeQNodes;
963 1.22 oster tmpqNode = qNodes;
964 1.3 oster for (i = 0; i < numParityNodes; i++) {
965 1.29 perry rf_InitNode(tmpwriteQNode, rf_wait, RF_FALSE,
966 1.29 perry rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
967 1.14 oster rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
968 1.14 oster "Wnq", allocList);
969 1.3 oster RF_ASSERT(pda != NULL);
970 1.22 oster tmpwriteQNode->params[0].p = pda; /* param 1 (bufPtr)
971 1.3 oster * filled in by xor node */
972 1.22 oster tmpwriteQNode->params[1].p = tmpqNode->results[0]; /* buffer pointer for
973 1.3 oster * parity write
974 1.3 oster * operation */
975 1.22 oster tmpwriteQNode->params[2].v = parityStripeID;
976 1.22 oster tmpwriteQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
977 1.17 oster which_ru);
978 1.3 oster pda = pda->next;
979 1.22 oster tmpwriteQNode = tmpwriteQNode->list_next;
980 1.22 oster tmpqNode = tmpqNode->list_next;
981 1.3 oster }
982 1.3 oster }
983 1.20 oster #endif
984 1.3 oster /*
985 1.3 oster * Step 4. connect the nodes.
986 1.3 oster */
987 1.3 oster
988 1.3 oster /* connect header to block node */
989 1.3 oster dag_h->succedents[0] = blockNode;
990 1.3 oster
991 1.3 oster /* connect block node to read old data nodes */
992 1.3 oster RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
993 1.22 oster tmpreadDataNode = readDataNodes;
994 1.3 oster for (i = 0; i < numDataNodes; i++) {
995 1.22 oster blockNode->succedents[i] = tmpreadDataNode;
996 1.22 oster RF_ASSERT(tmpreadDataNode->numAntecedents == 1);
997 1.22 oster tmpreadDataNode->antecedents[0] = blockNode;
998 1.22 oster tmpreadDataNode->antType[0] = rf_control;
999 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
1000 1.3 oster }
1001 1.3 oster
1002 1.3 oster /* connect block node to read old parity nodes */
1003 1.22 oster tmpreadParityNode = readParityNodes;
1004 1.3 oster for (i = 0; i < numParityNodes; i++) {
1005 1.22 oster blockNode->succedents[numDataNodes + i] = tmpreadParityNode;
1006 1.22 oster RF_ASSERT(tmpreadParityNode->numAntecedents == 1);
1007 1.22 oster tmpreadParityNode->antecedents[0] = blockNode;
1008 1.22 oster tmpreadParityNode->antType[0] = rf_control;
1009 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
1010 1.3 oster }
1011 1.3 oster
1012 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1013 1.3 oster /* connect block node to read old Q nodes */
1014 1.3 oster if (nfaults == 2) {
1015 1.22 oster tmpreadQNode = readQNodes;
1016 1.3 oster for (i = 0; i < numParityNodes; i++) {
1017 1.22 oster blockNode->succedents[numDataNodes + numParityNodes + i] = tmpreadQNode;
1018 1.22 oster RF_ASSERT(tmpreadQNode->numAntecedents == 1);
1019 1.22 oster tmpreadQNode->antecedents[0] = blockNode;
1020 1.22 oster tmpreadQNode->antType[0] = rf_control;
1021 1.22 oster tmpreadQNode = tmpreadQNode->list_next;
1022 1.3 oster }
1023 1.3 oster }
1024 1.20 oster #endif
1025 1.3 oster /* connect read old data nodes to xor nodes */
1026 1.22 oster tmpreadDataNode = readDataNodes;
1027 1.3 oster for (i = 0; i < numDataNodes; i++) {
1028 1.22 oster RF_ASSERT(tmpreadDataNode->numSuccedents == (nfaults * numParityNodes));
1029 1.22 oster tmpxorNode = xorNodes;
1030 1.3 oster for (j = 0; j < numParityNodes; j++) {
1031 1.22 oster RF_ASSERT(tmpxorNode->numAntecedents == numDataNodes + numParityNodes);
1032 1.22 oster tmpreadDataNode->succedents[j] = tmpxorNode;
1033 1.22 oster tmpxorNode->antecedents[i] = tmpreadDataNode;
1034 1.22 oster tmpxorNode->antType[i] = rf_trueData;
1035 1.22 oster tmpxorNode = tmpxorNode->list_next;
1036 1.3 oster }
1037 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
1038 1.3 oster }
1039 1.3 oster
1040 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1041 1.3 oster /* connect read old data nodes to q nodes */
1042 1.3 oster if (nfaults == 2) {
1043 1.22 oster tmpreadDataNode = readDataNodes;
1044 1.3 oster for (i = 0; i < numDataNodes; i++) {
1045 1.22 oster tmpqNode = qNodes;
1046 1.3 oster for (j = 0; j < numParityNodes; j++) {
1047 1.22 oster RF_ASSERT(tmpqNode->numAntecedents == numDataNodes + numParityNodes);
1048 1.22 oster tmpreadDataNode->succedents[numParityNodes + j] = tmpqNode;
1049 1.22 oster tmpqNode->antecedents[i] = tmpreadDataNode;
1050 1.22 oster tmpqNode->antType[i] = rf_trueData;
1051 1.22 oster tmpqNode = tmpqNode->list_next;
1052 1.3 oster }
1053 1.22 oster tmpreadDataNode = tmpreadDataNode->list_next;
1054 1.3 oster }
1055 1.3 oster }
1056 1.20 oster #endif
1057 1.3 oster /* connect read old parity nodes to xor nodes */
1058 1.22 oster tmpreadParityNode = readParityNodes;
1059 1.3 oster for (i = 0; i < numParityNodes; i++) {
1060 1.22 oster RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1061 1.22 oster tmpxorNode = xorNodes;
1062 1.3 oster for (j = 0; j < numParityNodes; j++) {
1063 1.22 oster tmpreadParityNode->succedents[j] = tmpxorNode;
1064 1.22 oster tmpxorNode->antecedents[numDataNodes + i] = tmpreadParityNode;
1065 1.22 oster tmpxorNode->antType[numDataNodes + i] = rf_trueData;
1066 1.22 oster tmpxorNode = tmpxorNode->list_next;
1067 1.3 oster }
1068 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
1069 1.3 oster }
1070 1.3 oster
1071 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1072 1.3 oster /* connect read old q nodes to q nodes */
1073 1.3 oster if (nfaults == 2) {
1074 1.22 oster tmpreadParityNode = readParityNodes;
1075 1.22 oster tmpreadQNode = readQNodes;
1076 1.3 oster for (i = 0; i < numParityNodes; i++) {
1077 1.22 oster RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1078 1.22 oster tmpqNode = qNodes;
1079 1.3 oster for (j = 0; j < numParityNodes; j++) {
1080 1.22 oster tmpreadQNode->succedents[j] = tmpqNode;
1081 1.22 oster tmpqNode->antecedents[numDataNodes + i] = tmpreadQNodes;
1082 1.22 oster tmpqNode->antType[numDataNodes + i] = rf_trueData;
1083 1.22 oster tmpqNode = tmpqNode->list_next;
1084 1.3 oster }
1085 1.22 oster tmpreadParityNode = tmpreadParityNode->list_next;
1086 1.22 oster tmpreadQNode = tmpreadQNode->list_next;
1087 1.3 oster }
1088 1.3 oster }
1089 1.20 oster #endif
1090 1.3 oster /* connect xor nodes to commit node */
1091 1.3 oster RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
1092 1.22 oster tmpxorNode = xorNodes;
1093 1.3 oster for (i = 0; i < numParityNodes; i++) {
1094 1.22 oster RF_ASSERT(tmpxorNode->numSuccedents == 1);
1095 1.22 oster tmpxorNode->succedents[0] = commitNode;
1096 1.22 oster commitNode->antecedents[i] = tmpxorNode;
1097 1.3 oster commitNode->antType[i] = rf_control;
1098 1.22 oster tmpxorNode = tmpxorNode->list_next;
1099 1.3 oster }
1100 1.3 oster
1101 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1102 1.3 oster /* connect q nodes to commit node */
1103 1.3 oster if (nfaults == 2) {
1104 1.22 oster tmpqNode = qNodes;
1105 1.3 oster for (i = 0; i < numParityNodes; i++) {
1106 1.22 oster RF_ASSERT(tmpqNode->numSuccedents == 1);
1107 1.22 oster tmpqNode->succedents[0] = commitNode;
1108 1.22 oster commitNode->antecedents[i + numParityNodes] = tmpqNode;
1109 1.3 oster commitNode->antType[i + numParityNodes] = rf_control;
1110 1.22 oster tmpqNode = tmpqNode->list_next;
1111 1.3 oster }
1112 1.3 oster }
1113 1.20 oster #endif
1114 1.3 oster /* connect commit node to write nodes */
1115 1.3 oster RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
1116 1.22 oster tmpwriteDataNode = writeDataNodes;
1117 1.3 oster for (i = 0; i < numDataNodes; i++) {
1118 1.28 oster RF_ASSERT(tmpwriteDataNode->numAntecedents == 1);
1119 1.22 oster commitNode->succedents[i] = tmpwriteDataNode;
1120 1.22 oster tmpwriteDataNode->antecedents[0] = commitNode;
1121 1.22 oster tmpwriteDataNode->antType[0] = rf_trueData;
1122 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
1123 1.3 oster }
1124 1.22 oster tmpwriteParityNode = writeParityNodes;
1125 1.3 oster for (i = 0; i < numParityNodes; i++) {
1126 1.22 oster RF_ASSERT(tmpwriteParityNode->numAntecedents == 1);
1127 1.22 oster commitNode->succedents[i + numDataNodes] = tmpwriteParityNode;
1128 1.22 oster tmpwriteParityNode->antecedents[0] = commitNode;
1129 1.22 oster tmpwriteParityNode->antType[0] = rf_trueData;
1130 1.22 oster tmpwriteParityNode = tmpwriteParityNode->list_next;
1131 1.3 oster }
1132 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1133 1.3 oster if (nfaults == 2) {
1134 1.22 oster tmpwriteQNode = writeQNodes;
1135 1.3 oster for (i = 0; i < numParityNodes; i++) {
1136 1.22 oster RF_ASSERT(tmpwriteQNode->numAntecedents == 1);
1137 1.22 oster commitNode->succedents[i + numDataNodes + numParityNodes] = tmpwriteQNode;
1138 1.22 oster tmpwriteQNode->antecedents[0] = commitNode;
1139 1.22 oster tmpwriteQNode->antType[0] = rf_trueData;
1140 1.22 oster tmpwriteQNode = tmpwriteQNode->list_next;
1141 1.3 oster }
1142 1.3 oster }
1143 1.20 oster #endif
1144 1.3 oster RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1145 1.3 oster RF_ASSERT(termNode->numSuccedents == 0);
1146 1.22 oster tmpwriteDataNode = writeDataNodes;
1147 1.3 oster for (i = 0; i < numDataNodes; i++) {
1148 1.16 oster /* connect write new data nodes to term node */
1149 1.22 oster RF_ASSERT(tmpwriteDataNode->numSuccedents == 1);
1150 1.16 oster RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1151 1.22 oster tmpwriteDataNode->succedents[0] = termNode;
1152 1.22 oster termNode->antecedents[i] = tmpwriteDataNode;
1153 1.16 oster termNode->antType[i] = rf_control;
1154 1.22 oster tmpwriteDataNode = tmpwriteDataNode->list_next;
1155 1.3 oster }
1156 1.3 oster
1157 1.22 oster tmpwriteParityNode = writeParityNodes;
1158 1.3 oster for (i = 0; i < numParityNodes; i++) {
1159 1.22 oster RF_ASSERT(tmpwriteParityNode->numSuccedents == 1);
1160 1.22 oster tmpwriteParityNode->succedents[0] = termNode;
1161 1.22 oster termNode->antecedents[numDataNodes + i] = tmpwriteParityNode;
1162 1.16 oster termNode->antType[numDataNodes + i] = rf_control;
1163 1.22 oster tmpwriteParityNode = tmpwriteParityNode->list_next;
1164 1.3 oster }
1165 1.3 oster
1166 1.20 oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1167 1.3 oster if (nfaults == 2) {
1168 1.22 oster tmpwriteQNode = writeQNodes;
1169 1.3 oster for (i = 0; i < numParityNodes; i++) {
1170 1.22 oster RF_ASSERT(tmpwriteQNode->numSuccedents == 1);
1171 1.22 oster tmpwriteQNode->succedents[0] = termNode;
1172 1.22 oster termNode->antecedents[numDataNodes + numParityNodes + i] = tmpwriteQNode;
1173 1.16 oster termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
1174 1.22 oster tmpwriteQNode = tmpwriteQNode->list_next;
1175 1.3 oster }
1176 1.3 oster }
1177 1.20 oster #endif
1178 1.1 oster }
1179 1.1 oster
1180 1.1 oster
1181 1.1 oster /******************************************************************************
1182 1.1 oster * create a write graph (fault-free or degraded) for RAID level 1
1183 1.1 oster *
1184 1.1 oster * Hdr -> Commit -> Wpd -> Nil -> Trm
1185 1.1 oster * -> Wsd ->
1186 1.1 oster *
1187 1.1 oster * The "Wpd" node writes data to the primary copy in the mirror pair
1188 1.1 oster * The "Wsd" node writes data to the secondary copy in the mirror pair
1189 1.1 oster *
1190 1.1 oster * Parameters: raidPtr - description of the physical array
1191 1.1 oster * asmap - logical & physical addresses for this access
1192 1.1 oster * bp - buffer ptr (holds write data)
1193 1.3 oster * flags - general flags (e.g. disk locking)
1194 1.1 oster * allocList - list of memory allocated in DAG creation
1195 1.1 oster *****************************************************************************/
1196 1.1 oster
1197 1.29 perry void
1198 1.13 oster rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
1199 1.33 christos RF_DagHeader_t *dag_h, void *bp,
1200 1.33 christos RF_RaidAccessFlags_t flags,
1201 1.13 oster RF_AllocListElem_t *allocList)
1202 1.1 oster {
1203 1.3 oster RF_DagNode_t *unblockNode, *termNode, *commitNode;
1204 1.22 oster RF_DagNode_t *wndNode, *wmirNode;
1205 1.22 oster RF_DagNode_t *tmpNode, *tmpwndNode, *tmpwmirNode;
1206 1.3 oster int nWndNodes, nWmirNodes, i;
1207 1.3 oster RF_ReconUnitNum_t which_ru;
1208 1.3 oster RF_PhysDiskAddr_t *pda, *pdaP;
1209 1.3 oster RF_StripeNum_t parityStripeID;
1210 1.3 oster
1211 1.3 oster parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
1212 1.3 oster asmap->raidAddress, &which_ru);
1213 1.19 oster #if RF_DEBUG_DAG
1214 1.3 oster if (rf_dagDebug) {
1215 1.3 oster printf("[Creating RAID level 1 write DAG]\n");
1216 1.3 oster }
1217 1.19 oster #endif
1218 1.3 oster dag_h->creator = "RaidOneWriteDAG";
1219 1.3 oster
1220 1.3 oster /* 2 implies access not SU aligned */
1221 1.3 oster nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
1222 1.3 oster nWndNodes = (asmap->physInfo->next) ? 2 : 1;
1223 1.3 oster
1224 1.3 oster /* alloc the Wnd nodes and the Wmir node */
1225 1.3 oster if (asmap->numDataFailed == 1)
1226 1.3 oster nWndNodes--;
1227 1.3 oster if (asmap->numParityFailed == 1)
1228 1.3 oster nWmirNodes--;
1229 1.3 oster
1230 1.3 oster /* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
1231 1.3 oster * + terminator) */
1232 1.22 oster for (i = 0; i < nWndNodes; i++) {
1233 1.22 oster tmpNode = rf_AllocDAGNode();
1234 1.22 oster tmpNode->list_next = dag_h->nodes;
1235 1.22 oster dag_h->nodes = tmpNode;
1236 1.22 oster }
1237 1.22 oster wndNode = dag_h->nodes;
1238 1.22 oster
1239 1.22 oster for (i = 0; i < nWmirNodes; i++) {
1240 1.22 oster tmpNode = rf_AllocDAGNode();
1241 1.22 oster tmpNode->list_next = dag_h->nodes;
1242 1.22 oster dag_h->nodes = tmpNode;
1243 1.22 oster }
1244 1.22 oster wmirNode = dag_h->nodes;
1245 1.22 oster
1246 1.22 oster commitNode = rf_AllocDAGNode();
1247 1.22 oster commitNode->list_next = dag_h->nodes;
1248 1.22 oster dag_h->nodes = commitNode;
1249 1.22 oster
1250 1.22 oster unblockNode = rf_AllocDAGNode();
1251 1.22 oster unblockNode->list_next = dag_h->nodes;
1252 1.22 oster dag_h->nodes = unblockNode;
1253 1.22 oster
1254 1.22 oster termNode = rf_AllocDAGNode();
1255 1.22 oster termNode->list_next = dag_h->nodes;
1256 1.22 oster dag_h->nodes = termNode;
1257 1.3 oster
1258 1.3 oster /* this dag can commit immediately */
1259 1.3 oster dag_h->numCommitNodes = 1;
1260 1.3 oster dag_h->numCommits = 0;
1261 1.3 oster dag_h->numSuccedents = 1;
1262 1.3 oster
1263 1.3 oster /* initialize the commit, unblock, and term nodes */
1264 1.29 perry rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
1265 1.29 perry rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes),
1266 1.14 oster 0, 0, 0, dag_h, "Cmt", allocList);
1267 1.29 perry rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
1268 1.29 perry rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes),
1269 1.14 oster 0, 0, dag_h, "Nil", allocList);
1270 1.29 perry rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
1271 1.29 perry rf_TerminateUndoFunc, NULL, 0, 1, 0, 0,
1272 1.14 oster dag_h, "Trm", allocList);
1273 1.3 oster
1274 1.3 oster /* initialize the wnd nodes */
1275 1.3 oster if (nWndNodes > 0) {
1276 1.3 oster pda = asmap->physInfo;
1277 1.22 oster tmpwndNode = wndNode;
1278 1.3 oster for (i = 0; i < nWndNodes; i++) {
1279 1.29 perry rf_InitNode(tmpwndNode, rf_wait, RF_FALSE,
1280 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1281 1.29 perry rf_GenericWakeupFunc, 1, 1, 4, 0,
1282 1.14 oster dag_h, "Wpd", allocList);
1283 1.3 oster RF_ASSERT(pda != NULL);
1284 1.22 oster tmpwndNode->params[0].p = pda;
1285 1.22 oster tmpwndNode->params[1].p = pda->bufPtr;
1286 1.22 oster tmpwndNode->params[2].v = parityStripeID;
1287 1.22 oster tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1288 1.3 oster pda = pda->next;
1289 1.22 oster tmpwndNode = tmpwndNode->list_next;
1290 1.3 oster }
1291 1.3 oster RF_ASSERT(pda == NULL);
1292 1.3 oster }
1293 1.3 oster /* initialize the mirror nodes */
1294 1.3 oster if (nWmirNodes > 0) {
1295 1.3 oster pda = asmap->physInfo;
1296 1.3 oster pdaP = asmap->parityInfo;
1297 1.22 oster tmpwmirNode = wmirNode;
1298 1.3 oster for (i = 0; i < nWmirNodes; i++) {
1299 1.29 perry rf_InitNode(tmpwmirNode, rf_wait, RF_FALSE,
1300 1.14 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1301 1.29 perry rf_GenericWakeupFunc, 1, 1, 4, 0,
1302 1.14 oster dag_h, "Wsd", allocList);
1303 1.3 oster RF_ASSERT(pda != NULL);
1304 1.22 oster tmpwmirNode->params[0].p = pdaP;
1305 1.22 oster tmpwmirNode->params[1].p = pda->bufPtr;
1306 1.22 oster tmpwmirNode->params[2].v = parityStripeID;
1307 1.22 oster tmpwmirNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1308 1.3 oster pda = pda->next;
1309 1.3 oster pdaP = pdaP->next;
1310 1.22 oster tmpwmirNode = tmpwmirNode->list_next;
1311 1.3 oster }
1312 1.3 oster RF_ASSERT(pda == NULL);
1313 1.3 oster RF_ASSERT(pdaP == NULL);
1314 1.3 oster }
1315 1.3 oster /* link the header node to the commit node */
1316 1.3 oster RF_ASSERT(dag_h->numSuccedents == 1);
1317 1.3 oster RF_ASSERT(commitNode->numAntecedents == 0);
1318 1.3 oster dag_h->succedents[0] = commitNode;
1319 1.3 oster
1320 1.3 oster /* link the commit node to the write nodes */
1321 1.3 oster RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
1322 1.22 oster tmpwndNode = wndNode;
1323 1.3 oster for (i = 0; i < nWndNodes; i++) {
1324 1.22 oster RF_ASSERT(tmpwndNode->numAntecedents == 1);
1325 1.22 oster commitNode->succedents[i] = tmpwndNode;
1326 1.22 oster tmpwndNode->antecedents[0] = commitNode;
1327 1.22 oster tmpwndNode->antType[0] = rf_control;
1328 1.22 oster tmpwndNode = tmpwndNode->list_next;
1329 1.3 oster }
1330 1.22 oster tmpwmirNode = wmirNode;
1331 1.3 oster for (i = 0; i < nWmirNodes; i++) {
1332 1.22 oster RF_ASSERT(tmpwmirNode->numAntecedents == 1);
1333 1.22 oster commitNode->succedents[i + nWndNodes] = tmpwmirNode;
1334 1.22 oster tmpwmirNode->antecedents[0] = commitNode;
1335 1.22 oster tmpwmirNode->antType[0] = rf_control;
1336 1.22 oster tmpwmirNode = tmpwmirNode->list_next;
1337 1.3 oster }
1338 1.3 oster
1339 1.3 oster /* link the write nodes to the unblock node */
1340 1.3 oster RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
1341 1.22 oster tmpwndNode = wndNode;
1342 1.3 oster for (i = 0; i < nWndNodes; i++) {
1343 1.22 oster RF_ASSERT(tmpwndNode->numSuccedents == 1);
1344 1.22 oster tmpwndNode->succedents[0] = unblockNode;
1345 1.22 oster unblockNode->antecedents[i] = tmpwndNode;
1346 1.3 oster unblockNode->antType[i] = rf_control;
1347 1.22 oster tmpwndNode = tmpwndNode->list_next;
1348 1.3 oster }
1349 1.22 oster tmpwmirNode = wmirNode;
1350 1.3 oster for (i = 0; i < nWmirNodes; i++) {
1351 1.22 oster RF_ASSERT(tmpwmirNode->numSuccedents == 1);
1352 1.22 oster tmpwmirNode->succedents[0] = unblockNode;
1353 1.22 oster unblockNode->antecedents[i + nWndNodes] = tmpwmirNode;
1354 1.3 oster unblockNode->antType[i + nWndNodes] = rf_control;
1355 1.22 oster tmpwmirNode = tmpwmirNode->list_next;
1356 1.3 oster }
1357 1.3 oster
1358 1.3 oster /* link the unblock node to the term node */
1359 1.3 oster RF_ASSERT(unblockNode->numSuccedents == 1);
1360 1.3 oster RF_ASSERT(termNode->numAntecedents == 1);
1361 1.3 oster RF_ASSERT(termNode->numSuccedents == 0);
1362 1.3 oster unblockNode->succedents[0] = termNode;
1363 1.3 oster termNode->antecedents[0] = unblockNode;
1364 1.3 oster termNode->antType[0] = rf_control;
1365 1.1 oster }
1366