rf_dagffwr.c revision 1.11.2.6 1 1.11.2.6 skrll /* $NetBSD: rf_dagffwr.c,v 1.11.2.6 2005/11/10 14:07:40 skrll Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster
29 1.1 oster /*
30 1.1 oster * rf_dagff.c
31 1.1 oster *
32 1.1 oster * code for creating fault-free DAGs
33 1.1 oster *
34 1.1 oster */
35 1.7 lukem
36 1.7 lukem #include <sys/cdefs.h>
37 1.11.2.6 skrll __KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 1.11.2.6 2005/11/10 14:07:40 skrll Exp $");
38 1.1 oster
39 1.6 oster #include <dev/raidframe/raidframevar.h>
40 1.6 oster
41 1.1 oster #include "rf_raid.h"
42 1.1 oster #include "rf_dag.h"
43 1.1 oster #include "rf_dagutils.h"
44 1.1 oster #include "rf_dagfuncs.h"
45 1.1 oster #include "rf_debugMem.h"
46 1.1 oster #include "rf_dagffrd.h"
47 1.1 oster #include "rf_general.h"
48 1.1 oster #include "rf_dagffwr.h"
49 1.11.2.1 skrll #include "rf_map.h"
50 1.1 oster
51 1.1 oster /******************************************************************************
52 1.1 oster *
53 1.1 oster * General comments on DAG creation:
54 1.3 oster *
55 1.1 oster * All DAGs in this file use roll-away error recovery. Each DAG has a single
56 1.1 oster * commit node, usually called "Cmt." If an error occurs before the Cmt node
57 1.1 oster * is reached, the execution engine will halt forward execution and work
58 1.1 oster * backward through the graph, executing the undo functions. Assuming that
59 1.1 oster * each node in the graph prior to the Cmt node are undoable and atomic - or -
60 1.1 oster * does not make changes to permanent state, the graph will fail atomically.
61 1.1 oster * If an error occurs after the Cmt node executes, the engine will roll-forward
62 1.1 oster * through the graph, blindly executing nodes until it reaches the end.
63 1.1 oster * If a graph reaches the end, it is assumed to have completed successfully.
64 1.1 oster *
65 1.1 oster * A graph has only 1 Cmt node.
66 1.1 oster *
67 1.1 oster */
68 1.1 oster
69 1.1 oster
70 1.1 oster /******************************************************************************
71 1.1 oster *
72 1.1 oster * The following wrappers map the standard DAG creation interface to the
73 1.1 oster * DAG creation routines. Additionally, these wrappers enable experimentation
74 1.1 oster * with new DAG structures by providing an extra level of indirection, allowing
75 1.1 oster * the DAG creation routines to be replaced at this single point.
76 1.1 oster */
77 1.1 oster
78 1.1 oster
79 1.11.2.5 skrll void
80 1.11.2.1 skrll rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
81 1.11.2.1 skrll RF_DagHeader_t *dag_h, void *bp,
82 1.11.2.1 skrll RF_RaidAccessFlags_t flags,
83 1.11.2.1 skrll RF_AllocListElem_t *allocList,
84 1.11.2.1 skrll RF_IoType_t type)
85 1.1 oster {
86 1.3 oster rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
87 1.11.2.1 skrll RF_IO_TYPE_WRITE);
88 1.1 oster }
89 1.1 oster
90 1.11.2.5 skrll void
91 1.11.2.1 skrll rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
92 1.11.2.1 skrll RF_DagHeader_t *dag_h, void *bp,
93 1.11.2.1 skrll RF_RaidAccessFlags_t flags,
94 1.11.2.1 skrll RF_AllocListElem_t *allocList,
95 1.11.2.1 skrll RF_IoType_t type)
96 1.1 oster {
97 1.3 oster rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
98 1.11.2.1 skrll RF_IO_TYPE_WRITE);
99 1.1 oster }
100 1.1 oster
101 1.11.2.5 skrll void
102 1.11.2.1 skrll rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
103 1.11.2.1 skrll RF_DagHeader_t *dag_h, void *bp,
104 1.11.2.1 skrll RF_RaidAccessFlags_t flags,
105 1.11.2.1 skrll RF_AllocListElem_t *allocList)
106 1.1 oster {
107 1.3 oster /* "normal" rollaway */
108 1.11.2.5 skrll rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
109 1.11.2.1 skrll allocList, &rf_xorFuncs, NULL);
110 1.1 oster }
111 1.1 oster
112 1.11.2.5 skrll void
113 1.11.2.1 skrll rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
114 1.11.2.1 skrll RF_DagHeader_t *dag_h, void *bp,
115 1.11.2.1 skrll RF_RaidAccessFlags_t flags,
116 1.11.2.1 skrll RF_AllocListElem_t *allocList)
117 1.1 oster {
118 1.3 oster /* "normal" rollaway */
119 1.11.2.5 skrll rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
120 1.11.2.1 skrll allocList, 1, rf_RegularXorFunc, RF_TRUE);
121 1.1 oster }
122 1.1 oster
123 1.1 oster
124 1.1 oster /******************************************************************************
125 1.1 oster *
126 1.1 oster * DAG creation code begins here
127 1.1 oster */
128 1.1 oster
129 1.1 oster
130 1.1 oster /******************************************************************************
131 1.1 oster *
132 1.1 oster * creates a DAG to perform a large-write operation:
133 1.1 oster *
134 1.1 oster * / Rod \ / Wnd \
135 1.1 oster * H -- block- Rod - Xor - Cmt - Wnd --- T
136 1.1 oster * \ Rod / \ Wnp /
137 1.1 oster * \[Wnq]/
138 1.1 oster *
139 1.1 oster * The XOR node also does the Q calculation in the P+Q architecture.
140 1.1 oster * All nodes are before the commit node (Cmt) are assumed to be atomic and
141 1.1 oster * undoable - or - they make no changes to permanent state.
142 1.1 oster *
143 1.1 oster * Rod = read old data
144 1.1 oster * Cmt = commit node
145 1.1 oster * Wnp = write new parity
146 1.1 oster * Wnd = write new data
147 1.1 oster * Wnq = write new "q"
148 1.1 oster * [] denotes optional segments in the graph
149 1.1 oster *
150 1.1 oster * Parameters: raidPtr - description of the physical array
151 1.1 oster * asmap - logical & physical addresses for this access
152 1.1 oster * bp - buffer ptr (holds write data)
153 1.3 oster * flags - general flags (e.g. disk locking)
154 1.1 oster * allocList - list of memory allocated in DAG creation
155 1.1 oster * nfaults - number of faults array can tolerate
156 1.1 oster * (equal to # redundancy units in stripe)
157 1.1 oster * redfuncs - list of redundancy generating functions
158 1.1 oster *
159 1.1 oster *****************************************************************************/
160 1.1 oster
161 1.11.2.5 skrll void
162 1.11.2.1 skrll rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
163 1.11.2.1 skrll RF_DagHeader_t *dag_h, void *bp,
164 1.11.2.1 skrll RF_RaidAccessFlags_t flags,
165 1.11.2.1 skrll RF_AllocListElem_t *allocList,
166 1.11.2.1 skrll int nfaults, int (*redFunc) (RF_DagNode_t *),
167 1.11.2.1 skrll int allowBufferRecycle)
168 1.1 oster {
169 1.11.2.1 skrll RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode;
170 1.3 oster RF_DagNode_t *wnqNode, *blockNode, *commitNode, *termNode;
171 1.3 oster int nWndNodes, nRodNodes, i, nodeNum, asmNum;
172 1.3 oster RF_AccessStripeMapHeader_t *new_asm_h[2];
173 1.3 oster RF_StripeNum_t parityStripeID;
174 1.3 oster char *sosBuffer, *eosBuffer;
175 1.3 oster RF_ReconUnitNum_t which_ru;
176 1.3 oster RF_RaidLayout_t *layoutPtr;
177 1.3 oster RF_PhysDiskAddr_t *pda;
178 1.3 oster
179 1.3 oster layoutPtr = &(raidPtr->Layout);
180 1.11.2.5 skrll parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
181 1.11.2.1 skrll asmap->raidAddress,
182 1.11.2.1 skrll &which_ru);
183 1.3 oster
184 1.11.2.1 skrll #if RF_DEBUG_DAG
185 1.3 oster if (rf_dagDebug) {
186 1.3 oster printf("[Creating large-write DAG]\n");
187 1.3 oster }
188 1.11.2.1 skrll #endif
189 1.3 oster dag_h->creator = "LargeWriteDAG";
190 1.3 oster
191 1.3 oster dag_h->numCommitNodes = 1;
192 1.3 oster dag_h->numCommits = 0;
193 1.3 oster dag_h->numSuccedents = 1;
194 1.3 oster
195 1.3 oster /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */
196 1.3 oster nWndNodes = asmap->numStripeUnitsAccessed;
197 1.11.2.1 skrll
198 1.11.2.1 skrll for (i = 0; i < nWndNodes; i++) {
199 1.11.2.1 skrll tmpNode = rf_AllocDAGNode();
200 1.11.2.1 skrll tmpNode->list_next = dag_h->nodes;
201 1.11.2.1 skrll dag_h->nodes = tmpNode;
202 1.11.2.1 skrll }
203 1.11.2.1 skrll wndNodes = dag_h->nodes;
204 1.11.2.1 skrll
205 1.11.2.1 skrll xorNode = rf_AllocDAGNode();
206 1.11.2.1 skrll xorNode->list_next = dag_h->nodes;
207 1.11.2.1 skrll dag_h->nodes = xorNode;
208 1.11.2.1 skrll
209 1.11.2.1 skrll wnpNode = rf_AllocDAGNode();
210 1.11.2.1 skrll wnpNode->list_next = dag_h->nodes;
211 1.11.2.1 skrll dag_h->nodes = wnpNode;
212 1.11.2.1 skrll
213 1.11.2.1 skrll blockNode = rf_AllocDAGNode();
214 1.11.2.1 skrll blockNode->list_next = dag_h->nodes;
215 1.11.2.1 skrll dag_h->nodes = blockNode;
216 1.11.2.1 skrll
217 1.11.2.1 skrll commitNode = rf_AllocDAGNode();
218 1.11.2.1 skrll commitNode->list_next = dag_h->nodes;
219 1.11.2.1 skrll dag_h->nodes = commitNode;
220 1.11.2.1 skrll
221 1.11.2.1 skrll termNode = rf_AllocDAGNode();
222 1.11.2.1 skrll termNode->list_next = dag_h->nodes;
223 1.11.2.1 skrll dag_h->nodes = termNode;
224 1.11.2.1 skrll
225 1.11.2.1 skrll #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
226 1.3 oster if (nfaults == 2) {
227 1.11.2.1 skrll wnqNode = rf_AllocDAGNode();
228 1.3 oster } else {
229 1.11.2.1 skrll #endif
230 1.3 oster wnqNode = NULL;
231 1.11.2.1 skrll #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
232 1.3 oster }
233 1.11.2.1 skrll #endif
234 1.11.2.5 skrll rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
235 1.11.2.5 skrll new_asm_h, &nRodNodes, &sosBuffer,
236 1.11.2.1 skrll &eosBuffer, allocList);
237 1.3 oster if (nRodNodes > 0) {
238 1.11.2.1 skrll for (i = 0; i < nRodNodes; i++) {
239 1.11.2.1 skrll tmpNode = rf_AllocDAGNode();
240 1.11.2.1 skrll tmpNode->list_next = dag_h->nodes;
241 1.11.2.1 skrll dag_h->nodes = tmpNode;
242 1.11.2.1 skrll }
243 1.11.2.1 skrll rodNodes = dag_h->nodes;
244 1.3 oster } else {
245 1.3 oster rodNodes = NULL;
246 1.3 oster }
247 1.3 oster
248 1.3 oster /* begin node initialization */
249 1.3 oster if (nRodNodes > 0) {
250 1.11.2.5 skrll rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
251 1.11.2.5 skrll rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0,
252 1.11.2.1 skrll dag_h, "Nil", allocList);
253 1.3 oster } else {
254 1.11.2.5 skrll rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
255 1.11.2.5 skrll rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0,
256 1.11.2.1 skrll dag_h, "Nil", allocList);
257 1.3 oster }
258 1.3 oster
259 1.11.2.5 skrll rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
260 1.11.2.5 skrll rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0,
261 1.11.2.1 skrll dag_h, "Cmt", allocList);
262 1.11.2.5 skrll rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
263 1.11.2.5 skrll rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0,
264 1.11.2.1 skrll dag_h, "Trm", allocList);
265 1.3 oster
266 1.3 oster /* initialize the Rod nodes */
267 1.11.2.1 skrll tmpNode = rodNodes;
268 1.3 oster for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
269 1.3 oster if (new_asm_h[asmNum]) {
270 1.3 oster pda = new_asm_h[asmNum]->stripeMap->physInfo;
271 1.3 oster while (pda) {
272 1.11.2.5 skrll rf_InitNode(tmpNode, rf_wait,
273 1.11.2.1 skrll RF_FALSE, rf_DiskReadFunc,
274 1.11.2.5 skrll rf_DiskReadUndoFunc,
275 1.11.2.5 skrll rf_GenericWakeupFunc,
276 1.11.2.1 skrll 1, 1, 4, 0, dag_h,
277 1.11.2.1 skrll "Rod", allocList);
278 1.11.2.1 skrll tmpNode->params[0].p = pda;
279 1.11.2.1 skrll tmpNode->params[1].p = pda->bufPtr;
280 1.11.2.1 skrll tmpNode->params[2].v = parityStripeID;
281 1.11.2.1 skrll tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
282 1.11.2.1 skrll which_ru);
283 1.3 oster nodeNum++;
284 1.3 oster pda = pda->next;
285 1.11.2.1 skrll tmpNode = tmpNode->list_next;
286 1.3 oster }
287 1.3 oster }
288 1.3 oster }
289 1.3 oster RF_ASSERT(nodeNum == nRodNodes);
290 1.3 oster
291 1.3 oster /* initialize the wnd nodes */
292 1.3 oster pda = asmap->physInfo;
293 1.11.2.1 skrll tmpNode = wndNodes;
294 1.3 oster for (i = 0; i < nWndNodes; i++) {
295 1.11.2.5 skrll rf_InitNode(tmpNode, rf_wait, RF_FALSE,
296 1.11.2.1 skrll rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
297 1.11.2.5 skrll rf_GenericWakeupFunc, 1, 1, 4, 0,
298 1.11.2.1 skrll dag_h, "Wnd", allocList);
299 1.3 oster RF_ASSERT(pda != NULL);
300 1.11.2.1 skrll tmpNode->params[0].p = pda;
301 1.11.2.1 skrll tmpNode->params[1].p = pda->bufPtr;
302 1.11.2.1 skrll tmpNode->params[2].v = parityStripeID;
303 1.11.2.1 skrll tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
304 1.3 oster pda = pda->next;
305 1.11.2.1 skrll tmpNode = tmpNode->list_next;
306 1.3 oster }
307 1.3 oster
308 1.3 oster /* initialize the redundancy node */
309 1.3 oster if (nRodNodes > 0) {
310 1.11.2.5 skrll rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
311 1.11.2.1 skrll rf_NullNodeUndoFunc, NULL, 1,
312 1.11.2.5 skrll nRodNodes, 2 * (nWndNodes + nRodNodes) + 1,
313 1.11.2.1 skrll nfaults, dag_h, "Xr ", allocList);
314 1.3 oster } else {
315 1.11.2.5 skrll rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
316 1.11.2.1 skrll rf_NullNodeUndoFunc, NULL, 1,
317 1.11.2.5 skrll 1, 2 * (nWndNodes + nRodNodes) + 1,
318 1.11.2.1 skrll nfaults, dag_h, "Xr ", allocList);
319 1.3 oster }
320 1.3 oster xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
321 1.11.2.1 skrll tmpNode = wndNodes;
322 1.3 oster for (i = 0; i < nWndNodes; i++) {
323 1.11.2.1 skrll /* pda */
324 1.11.2.1 skrll xorNode->params[2 * i + 0] = tmpNode->params[0];
325 1.11.2.5 skrll /* buf ptr */
326 1.11.2.1 skrll xorNode->params[2 * i + 1] = tmpNode->params[1];
327 1.11.2.1 skrll tmpNode = tmpNode->list_next;
328 1.3 oster }
329 1.11.2.1 skrll tmpNode = rodNodes;
330 1.3 oster for (i = 0; i < nRodNodes; i++) {
331 1.11.2.1 skrll /* pda */
332 1.11.2.1 skrll xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0];
333 1.11.2.1 skrll /* buf ptr */
334 1.11.2.1 skrll xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1];
335 1.11.2.1 skrll tmpNode = tmpNode->list_next;
336 1.3 oster }
337 1.3 oster /* xor node needs to get at RAID information */
338 1.3 oster xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
339 1.3 oster
340 1.3 oster /*
341 1.11.2.1 skrll * Look for an Rod node that reads a complete SU. If none,
342 1.11.2.1 skrll * alloc a buffer to receive the parity info. Note that we
343 1.11.2.1 skrll * can't use a new data buffer because it will not have gotten
344 1.11.2.1 skrll * written when the xor occurs. */
345 1.3 oster if (allowBufferRecycle) {
346 1.11.2.1 skrll tmpNode = rodNodes;
347 1.3 oster for (i = 0; i < nRodNodes; i++) {
348 1.11.2.1 skrll if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
349 1.3 oster break;
350 1.11.2.1 skrll tmpNode = tmpNode->list_next;
351 1.3 oster }
352 1.3 oster }
353 1.3 oster if ((!allowBufferRecycle) || (i == nRodNodes)) {
354 1.11.2.1 skrll xorNode->results[0] = rf_AllocBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit));
355 1.3 oster } else {
356 1.11.2.1 skrll /* this works because the only way we get here is if
357 1.11.2.1 skrll allowBufferRecycle is true and we went through the
358 1.11.2.1 skrll above for loop, and exited via the break before
359 1.11.2.1 skrll i==nRodNodes was true. That means tmpNode will
360 1.11.2.1 skrll still point to a valid node -- the one we want for
361 1.11.2.1 skrll here! */
362 1.11.2.1 skrll xorNode->results[0] = tmpNode->params[1].p;
363 1.3 oster }
364 1.3 oster
365 1.3 oster /* initialize the Wnp node */
366 1.11.2.5 skrll rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
367 1.11.2.5 skrll rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
368 1.11.2.1 skrll dag_h, "Wnp", allocList);
369 1.3 oster wnpNode->params[0].p = asmap->parityInfo;
370 1.3 oster wnpNode->params[1].p = xorNode->results[0];
371 1.3 oster wnpNode->params[2].v = parityStripeID;
372 1.11.2.1 skrll wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
373 1.3 oster /* parityInfo must describe entire parity unit */
374 1.3 oster RF_ASSERT(asmap->parityInfo->next == NULL);
375 1.3 oster
376 1.11.2.1 skrll #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
377 1.3 oster if (nfaults == 2) {
378 1.3 oster /*
379 1.3 oster * We never try to recycle a buffer for the Q calcuation
380 1.3 oster * in addition to the parity. This would cause two buffers
381 1.3 oster * to get smashed during the P and Q calculation, guaranteeing
382 1.3 oster * one would be wrong.
383 1.3 oster */
384 1.11.2.1 skrll RF_MallocAndAdd(xorNode->results[1],
385 1.11.2.1 skrll rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
386 1.11.2.1 skrll (void *), allocList);
387 1.11.2.5 skrll rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
388 1.11.2.5 skrll rf_DiskWriteUndoFunc, rf_GenericWakeupFunc,
389 1.11.2.1 skrll 1, 1, 4, 0, dag_h, "Wnq", allocList);
390 1.3 oster wnqNode->params[0].p = asmap->qInfo;
391 1.3 oster wnqNode->params[1].p = xorNode->results[1];
392 1.3 oster wnqNode->params[2].v = parityStripeID;
393 1.11.2.1 skrll wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
394 1.3 oster /* parityInfo must describe entire parity unit */
395 1.3 oster RF_ASSERT(asmap->parityInfo->next == NULL);
396 1.3 oster }
397 1.11.2.1 skrll #endif
398 1.3 oster /*
399 1.3 oster * Connect nodes to form graph.
400 1.3 oster */
401 1.3 oster
402 1.3 oster /* connect dag header to block node */
403 1.3 oster RF_ASSERT(blockNode->numAntecedents == 0);
404 1.3 oster dag_h->succedents[0] = blockNode;
405 1.3 oster
406 1.3 oster if (nRodNodes > 0) {
407 1.3 oster /* connect the block node to the Rod nodes */
408 1.3 oster RF_ASSERT(blockNode->numSuccedents == nRodNodes);
409 1.3 oster RF_ASSERT(xorNode->numAntecedents == nRodNodes);
410 1.11.2.1 skrll tmpNode = rodNodes;
411 1.3 oster for (i = 0; i < nRodNodes; i++) {
412 1.11.2.2 skrll RF_ASSERT(tmpNode->numAntecedents == 1);
413 1.11.2.1 skrll blockNode->succedents[i] = tmpNode;
414 1.11.2.1 skrll tmpNode->antecedents[0] = blockNode;
415 1.11.2.1 skrll tmpNode->antType[0] = rf_control;
416 1.3 oster
417 1.3 oster /* connect the Rod nodes to the Xor node */
418 1.11.2.2 skrll RF_ASSERT(tmpNode->numSuccedents == 1);
419 1.11.2.1 skrll tmpNode->succedents[0] = xorNode;
420 1.11.2.1 skrll xorNode->antecedents[i] = tmpNode;
421 1.3 oster xorNode->antType[i] = rf_trueData;
422 1.11.2.1 skrll tmpNode = tmpNode->list_next;
423 1.3 oster }
424 1.3 oster } else {
425 1.3 oster /* connect the block node to the Xor node */
426 1.3 oster RF_ASSERT(blockNode->numSuccedents == 1);
427 1.3 oster RF_ASSERT(xorNode->numAntecedents == 1);
428 1.3 oster blockNode->succedents[0] = xorNode;
429 1.3 oster xorNode->antecedents[0] = blockNode;
430 1.3 oster xorNode->antType[0] = rf_control;
431 1.3 oster }
432 1.3 oster
433 1.3 oster /* connect the xor node to the commit node */
434 1.3 oster RF_ASSERT(xorNode->numSuccedents == 1);
435 1.3 oster RF_ASSERT(commitNode->numAntecedents == 1);
436 1.3 oster xorNode->succedents[0] = commitNode;
437 1.3 oster commitNode->antecedents[0] = xorNode;
438 1.3 oster commitNode->antType[0] = rf_control;
439 1.3 oster
440 1.3 oster /* connect the commit node to the write nodes */
441 1.3 oster RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
442 1.11.2.1 skrll tmpNode = wndNodes;
443 1.3 oster for (i = 0; i < nWndNodes; i++) {
444 1.3 oster RF_ASSERT(wndNodes->numAntecedents == 1);
445 1.11.2.1 skrll commitNode->succedents[i] = tmpNode;
446 1.11.2.1 skrll tmpNode->antecedents[0] = commitNode;
447 1.11.2.1 skrll tmpNode->antType[0] = rf_control;
448 1.11.2.1 skrll tmpNode = tmpNode->list_next;
449 1.3 oster }
450 1.3 oster RF_ASSERT(wnpNode->numAntecedents == 1);
451 1.3 oster commitNode->succedents[nWndNodes] = wnpNode;
452 1.3 oster wnpNode->antecedents[0] = commitNode;
453 1.3 oster wnpNode->antType[0] = rf_trueData;
454 1.11.2.1 skrll #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
455 1.3 oster if (nfaults == 2) {
456 1.3 oster RF_ASSERT(wnqNode->numAntecedents == 1);
457 1.3 oster commitNode->succedents[nWndNodes + 1] = wnqNode;
458 1.3 oster wnqNode->antecedents[0] = commitNode;
459 1.3 oster wnqNode->antType[0] = rf_trueData;
460 1.3 oster }
461 1.11.2.1 skrll #endif
462 1.3 oster /* connect the write nodes to the term node */
463 1.3 oster RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
464 1.3 oster RF_ASSERT(termNode->numSuccedents == 0);
465 1.11.2.1 skrll tmpNode = wndNodes;
466 1.3 oster for (i = 0; i < nWndNodes; i++) {
467 1.3 oster RF_ASSERT(wndNodes->numSuccedents == 1);
468 1.11.2.1 skrll tmpNode->succedents[0] = termNode;
469 1.11.2.1 skrll termNode->antecedents[i] = tmpNode;
470 1.3 oster termNode->antType[i] = rf_control;
471 1.11.2.1 skrll tmpNode = tmpNode->list_next;
472 1.3 oster }
473 1.3 oster RF_ASSERT(wnpNode->numSuccedents == 1);
474 1.3 oster wnpNode->succedents[0] = termNode;
475 1.3 oster termNode->antecedents[nWndNodes] = wnpNode;
476 1.3 oster termNode->antType[nWndNodes] = rf_control;
477 1.11.2.1 skrll #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
478 1.3 oster if (nfaults == 2) {
479 1.3 oster RF_ASSERT(wnqNode->numSuccedents == 1);
480 1.3 oster wnqNode->succedents[0] = termNode;
481 1.3 oster termNode->antecedents[nWndNodes + 1] = wnqNode;
482 1.3 oster termNode->antType[nWndNodes + 1] = rf_control;
483 1.3 oster }
484 1.11.2.1 skrll #endif
485 1.1 oster }
486 1.1 oster /******************************************************************************
487 1.1 oster *
488 1.1 oster * creates a DAG to perform a small-write operation (either raid 5 or pq),
489 1.1 oster * which is as follows:
490 1.1 oster *
491 1.1 oster * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
492 1.1 oster * \- Rod X / \----> Wnd [Und]-/
493 1.1 oster * [\- Rod X / \---> Wnd [Und]-/]
494 1.1 oster * [\- Roq -> Q / \--> Wnq [Unq]-/]
495 1.1 oster *
496 1.1 oster * Rop = read old parity
497 1.1 oster * Rod = read old data
498 1.1 oster * Roq = read old "q"
499 1.1 oster * Cmt = commit node
500 1.1 oster * Und = unlock data disk
501 1.1 oster * Unp = unlock parity disk
502 1.1 oster * Unq = unlock q disk
503 1.1 oster * Wnp = write new parity
504 1.1 oster * Wnd = write new data
505 1.1 oster * Wnq = write new "q"
506 1.1 oster * [ ] denotes optional segments in the graph
507 1.1 oster *
508 1.1 oster * Parameters: raidPtr - description of the physical array
509 1.1 oster * asmap - logical & physical addresses for this access
510 1.1 oster * bp - buffer ptr (holds write data)
511 1.3 oster * flags - general flags (e.g. disk locking)
512 1.1 oster * allocList - list of memory allocated in DAG creation
513 1.1 oster * pfuncs - list of parity generating functions
514 1.1 oster * qfuncs - list of q generating functions
515 1.1 oster *
516 1.1 oster * A null qfuncs indicates single fault tolerant
517 1.1 oster *****************************************************************************/
518 1.1 oster
519 1.11.2.5 skrll void
520 1.11.2.1 skrll rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
521 1.11.2.1 skrll RF_DagHeader_t *dag_h, void *bp,
522 1.11.2.1 skrll RF_RaidAccessFlags_t flags,
523 1.11.2.1 skrll RF_AllocListElem_t *allocList,
524 1.11.2.1 skrll const RF_RedFuncs_t *pfuncs,
525 1.11.2.1 skrll const RF_RedFuncs_t *qfuncs)
526 1.1 oster {
527 1.3 oster RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode;
528 1.11.2.1 skrll RF_DagNode_t *tmpNode, *tmpreadDataNode, *tmpreadParityNode;
529 1.11.2.1 skrll RF_DagNode_t *xorNodes, *qNodes, *blockNode, *commitNode;
530 1.3 oster RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes;
531 1.11.2.1 skrll RF_DagNode_t *tmpxorNode, *tmpqNode, *tmpwriteDataNode, *tmpreadQNode;
532 1.11.2.1 skrll RF_DagNode_t *tmpwriteParityNode;
533 1.11.2.1 skrll #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
534 1.11.2.1 skrll RF_DagNode_t *tmpwriteQNode;
535 1.11.2.1 skrll #endif
536 1.11.2.1 skrll int i, j, nNodes, totalNumNodes;
537 1.3 oster RF_ReconUnitNum_t which_ru;
538 1.3 oster int (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
539 1.3 oster int (*qfunc) (RF_DagNode_t *);
540 1.3 oster int numDataNodes, numParityNodes;
541 1.3 oster RF_StripeNum_t parityStripeID;
542 1.3 oster RF_PhysDiskAddr_t *pda;
543 1.11.2.6 skrll const char *name, *qname;
544 1.3 oster long nfaults;
545 1.3 oster
546 1.3 oster nfaults = qfuncs ? 2 : 1;
547 1.3 oster
548 1.3 oster parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
549 1.3 oster asmap->raidAddress, &which_ru);
550 1.3 oster pda = asmap->physInfo;
551 1.3 oster numDataNodes = asmap->numStripeUnitsAccessed;
552 1.3 oster numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
553 1.3 oster
554 1.11.2.1 skrll #if RF_DEBUG_DAG
555 1.3 oster if (rf_dagDebug) {
556 1.3 oster printf("[Creating small-write DAG]\n");
557 1.3 oster }
558 1.11.2.1 skrll #endif
559 1.3 oster RF_ASSERT(numDataNodes > 0);
560 1.3 oster dag_h->creator = "SmallWriteDAG";
561 1.3 oster
562 1.3 oster dag_h->numCommitNodes = 1;
563 1.3 oster dag_h->numCommits = 0;
564 1.3 oster dag_h->numSuccedents = 1;
565 1.3 oster
566 1.3 oster /*
567 1.3 oster * DAG creation occurs in four steps:
568 1.3 oster * 1. count the number of nodes in the DAG
569 1.3 oster * 2. create the nodes
570 1.3 oster * 3. initialize the nodes
571 1.3 oster * 4. connect the nodes
572 1.3 oster */
573 1.3 oster
574 1.3 oster /*
575 1.3 oster * Step 1. compute number of nodes in the graph
576 1.3 oster */
577 1.3 oster
578 1.11.2.1 skrll /* number of nodes: a read and write for each data unit a
579 1.11.2.1 skrll * redundancy computation node for each parity node (nfaults *
580 1.11.2.1 skrll * nparity) a read and write for each parity unit a block and
581 1.11.2.1 skrll * commit node (2) a terminate node if atomic RMW an unlock
582 1.11.2.1 skrll * node for each data unit, redundancy unit */
583 1.3 oster totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
584 1.3 oster + (nfaults * 2 * numParityNodes) + 3;
585 1.3 oster /*
586 1.3 oster * Step 2. create the nodes
587 1.3 oster */
588 1.11.2.1 skrll
589 1.11.2.1 skrll blockNode = rf_AllocDAGNode();
590 1.11.2.1 skrll blockNode->list_next = dag_h->nodes;
591 1.11.2.1 skrll dag_h->nodes = blockNode;
592 1.11.2.1 skrll
593 1.11.2.1 skrll commitNode = rf_AllocDAGNode();
594 1.11.2.1 skrll commitNode->list_next = dag_h->nodes;
595 1.11.2.1 skrll dag_h->nodes = commitNode;
596 1.11.2.1 skrll
597 1.11.2.1 skrll for (i = 0; i < numDataNodes; i++) {
598 1.11.2.1 skrll tmpNode = rf_AllocDAGNode();
599 1.11.2.1 skrll tmpNode->list_next = dag_h->nodes;
600 1.11.2.1 skrll dag_h->nodes = tmpNode;
601 1.11.2.1 skrll }
602 1.11.2.1 skrll readDataNodes = dag_h->nodes;
603 1.11.2.1 skrll
604 1.11.2.1 skrll for (i = 0; i < numParityNodes; i++) {
605 1.11.2.1 skrll tmpNode = rf_AllocDAGNode();
606 1.11.2.1 skrll tmpNode->list_next = dag_h->nodes;
607 1.11.2.1 skrll dag_h->nodes = tmpNode;
608 1.11.2.1 skrll }
609 1.11.2.1 skrll readParityNodes = dag_h->nodes;
610 1.11.2.5 skrll
611 1.11.2.1 skrll for (i = 0; i < numDataNodes; i++) {
612 1.11.2.1 skrll tmpNode = rf_AllocDAGNode();
613 1.11.2.1 skrll tmpNode->list_next = dag_h->nodes;
614 1.11.2.1 skrll dag_h->nodes = tmpNode;
615 1.11.2.1 skrll }
616 1.11.2.1 skrll writeDataNodes = dag_h->nodes;
617 1.11.2.1 skrll
618 1.11.2.1 skrll for (i = 0; i < numParityNodes; i++) {
619 1.11.2.1 skrll tmpNode = rf_AllocDAGNode();
620 1.11.2.1 skrll tmpNode->list_next = dag_h->nodes;
621 1.11.2.1 skrll dag_h->nodes = tmpNode;
622 1.11.2.1 skrll }
623 1.11.2.1 skrll writeParityNodes = dag_h->nodes;
624 1.11.2.1 skrll
625 1.11.2.1 skrll for (i = 0; i < numParityNodes; i++) {
626 1.11.2.1 skrll tmpNode = rf_AllocDAGNode();
627 1.11.2.1 skrll tmpNode->list_next = dag_h->nodes;
628 1.11.2.1 skrll dag_h->nodes = tmpNode;
629 1.3 oster }
630 1.11.2.1 skrll xorNodes = dag_h->nodes;
631 1.11.2.1 skrll
632 1.11.2.1 skrll termNode = rf_AllocDAGNode();
633 1.11.2.1 skrll termNode->list_next = dag_h->nodes;
634 1.11.2.1 skrll dag_h->nodes = termNode;
635 1.11.2.1 skrll
636 1.11.2.1 skrll #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
637 1.3 oster if (nfaults == 2) {
638 1.11.2.1 skrll for (i = 0; i < numParityNodes; i++) {
639 1.11.2.1 skrll tmpNode = rf_AllocDAGNode();
640 1.11.2.1 skrll tmpNode->list_next = dag_h->nodes;
641 1.11.2.1 skrll dag_h->nodes = tmpNode;
642 1.3 oster }
643 1.11.2.1 skrll readQNodes = dag_h->nodes;
644 1.11.2.1 skrll
645 1.11.2.1 skrll for (i = 0; i < numParityNodes; i++) {
646 1.11.2.1 skrll tmpNode = rf_AllocDAGNode();
647 1.11.2.1 skrll tmpNode->list_next = dag_h->nodes;
648 1.11.2.1 skrll dag_h->nodes = tmpNode;
649 1.11.2.1 skrll }
650 1.11.2.1 skrll writeQNodes = dag_h->nodes;
651 1.11.2.1 skrll
652 1.11.2.1 skrll for (i = 0; i < numParityNodes; i++) {
653 1.11.2.1 skrll tmpNode = rf_AllocDAGNode();
654 1.11.2.1 skrll tmpNode->list_next = dag_h->nodes;
655 1.11.2.1 skrll dag_h->nodes = tmpNode;
656 1.11.2.1 skrll }
657 1.11.2.1 skrll qNodes = dag_h->nodes;
658 1.3 oster } else {
659 1.11.2.1 skrll #endif
660 1.11.2.1 skrll readQNodes = writeQNodes = qNodes = NULL;
661 1.11.2.1 skrll #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
662 1.3 oster }
663 1.11.2.1 skrll #endif
664 1.3 oster
665 1.3 oster /*
666 1.3 oster * Step 3. initialize the nodes
667 1.3 oster */
668 1.3 oster /* initialize block node (Nil) */
669 1.3 oster nNodes = numDataNodes + (nfaults * numParityNodes);
670 1.11.2.5 skrll rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
671 1.11.2.5 skrll rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0,
672 1.11.2.1 skrll dag_h, "Nil", allocList);
673 1.3 oster
674 1.3 oster /* initialize commit node (Cmt) */
675 1.11.2.5 skrll rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
676 1.11.2.5 skrll rf_NullNodeUndoFunc, NULL, nNodes,
677 1.11.2.1 skrll (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
678 1.3 oster
679 1.3 oster /* initialize terminate node (Trm) */
680 1.11.2.5 skrll rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
681 1.11.2.5 skrll rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0,
682 1.11.2.1 skrll dag_h, "Trm", allocList);
683 1.3 oster
684 1.3 oster /* initialize nodes which read old data (Rod) */
685 1.11.2.1 skrll tmpreadDataNode = readDataNodes;
686 1.3 oster for (i = 0; i < numDataNodes; i++) {
687 1.11.2.5 skrll rf_InitNode(tmpreadDataNode, rf_wait, RF_FALSE,
688 1.11.2.1 skrll rf_DiskReadFunc, rf_DiskReadUndoFunc,
689 1.11.2.5 skrll rf_GenericWakeupFunc, (nfaults * numParityNodes),
690 1.11.2.1 skrll 1, 4, 0, dag_h, "Rod", allocList);
691 1.3 oster RF_ASSERT(pda != NULL);
692 1.3 oster /* physical disk addr desc */
693 1.11.2.1 skrll tmpreadDataNode->params[0].p = pda;
694 1.3 oster /* buffer to hold old data */
695 1.11.2.1 skrll tmpreadDataNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
696 1.11.2.1 skrll tmpreadDataNode->params[2].v = parityStripeID;
697 1.11.2.1 skrll tmpreadDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
698 1.11.2.1 skrll which_ru);
699 1.3 oster pda = pda->next;
700 1.11.2.1 skrll for (j = 0; j < tmpreadDataNode->numSuccedents; j++) {
701 1.11.2.1 skrll tmpreadDataNode->propList[j] = NULL;
702 1.3 oster }
703 1.11.2.1 skrll tmpreadDataNode = tmpreadDataNode->list_next;
704 1.3 oster }
705 1.3 oster
706 1.3 oster /* initialize nodes which read old parity (Rop) */
707 1.3 oster pda = asmap->parityInfo;
708 1.3 oster i = 0;
709 1.11.2.1 skrll tmpreadParityNode = readParityNodes;
710 1.3 oster for (i = 0; i < numParityNodes; i++) {
711 1.3 oster RF_ASSERT(pda != NULL);
712 1.11.2.5 skrll rf_InitNode(tmpreadParityNode, rf_wait, RF_FALSE,
713 1.11.2.1 skrll rf_DiskReadFunc, rf_DiskReadUndoFunc,
714 1.11.2.5 skrll rf_GenericWakeupFunc, numParityNodes, 1, 4, 0,
715 1.11.2.1 skrll dag_h, "Rop", allocList);
716 1.11.2.1 skrll tmpreadParityNode->params[0].p = pda;
717 1.3 oster /* buffer to hold old parity */
718 1.11.2.1 skrll tmpreadParityNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
719 1.11.2.1 skrll tmpreadParityNode->params[2].v = parityStripeID;
720 1.11.2.1 skrll tmpreadParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
721 1.11.2.1 skrll which_ru);
722 1.3 oster pda = pda->next;
723 1.11.2.1 skrll for (j = 0; j < tmpreadParityNode->numSuccedents; j++) {
724 1.11.2.1 skrll tmpreadParityNode->propList[0] = NULL;
725 1.3 oster }
726 1.11.2.1 skrll tmpreadParityNode = tmpreadParityNode->list_next;
727 1.3 oster }
728 1.3 oster
729 1.11.2.1 skrll #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
730 1.3 oster /* initialize nodes which read old Q (Roq) */
731 1.3 oster if (nfaults == 2) {
732 1.3 oster pda = asmap->qInfo;
733 1.11.2.1 skrll tmpreadQNode = readQNodes;
734 1.3 oster for (i = 0; i < numParityNodes; i++) {
735 1.3 oster RF_ASSERT(pda != NULL);
736 1.11.2.5 skrll rf_InitNode(tmpreadQNode, rf_wait, RF_FALSE,
737 1.11.2.1 skrll rf_DiskReadFunc, rf_DiskReadUndoFunc,
738 1.11.2.5 skrll rf_GenericWakeupFunc, numParityNodes,
739 1.11.2.1 skrll 1, 4, 0, dag_h, "Roq", allocList);
740 1.11.2.1 skrll tmpreadQNode->params[0].p = pda;
741 1.3 oster /* buffer to hold old Q */
742 1.11.2.1 skrll tmpreadQNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h,
743 1.11.2.1 skrll pda->numSector << raidPtr->logBytesPerSector);
744 1.11.2.1 skrll tmpreadQNode->params[2].v = parityStripeID;
745 1.11.2.1 skrll tmpreadQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
746 1.11.2.1 skrll which_ru);
747 1.3 oster pda = pda->next;
748 1.11.2.1 skrll for (j = 0; j < tmpreadQNode->numSuccedents; j++) {
749 1.11.2.1 skrll tmpreadQNode->propList[0] = NULL;
750 1.3 oster }
751 1.11.2.1 skrll tmpreadQNode = tmpreadQNode->list_next;
752 1.3 oster }
753 1.3 oster }
754 1.11.2.1 skrll #endif
755 1.3 oster /* initialize nodes which write new data (Wnd) */
756 1.3 oster pda = asmap->physInfo;
757 1.11.2.1 skrll tmpwriteDataNode = writeDataNodes;
758 1.3 oster for (i = 0; i < numDataNodes; i++) {
759 1.3 oster RF_ASSERT(pda != NULL);
760 1.11.2.5 skrll rf_InitNode(tmpwriteDataNode, rf_wait, RF_FALSE,
761 1.11.2.5 skrll rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
762 1.11.2.1 skrll rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
763 1.11.2.1 skrll "Wnd", allocList);
764 1.3 oster /* physical disk addr desc */
765 1.11.2.1 skrll tmpwriteDataNode->params[0].p = pda;
766 1.3 oster /* buffer holding new data to be written */
767 1.11.2.1 skrll tmpwriteDataNode->params[1].p = pda->bufPtr;
768 1.11.2.1 skrll tmpwriteDataNode->params[2].v = parityStripeID;
769 1.11.2.1 skrll tmpwriteDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
770 1.11.2.1 skrll which_ru);
771 1.3 oster pda = pda->next;
772 1.11.2.1 skrll tmpwriteDataNode = tmpwriteDataNode->list_next;
773 1.3 oster }
774 1.3 oster
775 1.3 oster /*
776 1.3 oster * Initialize nodes which compute new parity and Q.
777 1.3 oster */
778 1.3 oster /*
779 1.3 oster * We use the simple XOR func in the double-XOR case, and when
780 1.11.2.1 skrll * we're accessing only a portion of one stripe unit. The
781 1.11.2.1 skrll * distinction between the two is that the regular XOR func
782 1.11.2.1 skrll * assumes that the targbuf is a full SU in size, and examines
783 1.11.2.1 skrll * the pda associated with the buffer to decide where within
784 1.11.2.1 skrll * the buffer to XOR the data, whereas the simple XOR func
785 1.11.2.1 skrll * just XORs the data into the start of the buffer. */
786 1.3 oster if ((numParityNodes == 2) || ((numDataNodes == 1)
787 1.11.2.5 skrll && (asmap->totalSectorsAccessed <
788 1.11.2.1 skrll raidPtr->Layout.sectorsPerStripeUnit))) {
789 1.3 oster func = pfuncs->simple;
790 1.3 oster undoFunc = rf_NullNodeUndoFunc;
791 1.3 oster name = pfuncs->SimpleName;
792 1.3 oster if (qfuncs) {
793 1.3 oster qfunc = qfuncs->simple;
794 1.3 oster qname = qfuncs->SimpleName;
795 1.3 oster } else {
796 1.3 oster qfunc = NULL;
797 1.3 oster qname = NULL;
798 1.3 oster }
799 1.3 oster } else {
800 1.3 oster func = pfuncs->regular;
801 1.3 oster undoFunc = rf_NullNodeUndoFunc;
802 1.3 oster name = pfuncs->RegularName;
803 1.3 oster if (qfuncs) {
804 1.3 oster qfunc = qfuncs->regular;
805 1.3 oster qname = qfuncs->RegularName;
806 1.3 oster } else {
807 1.3 oster qfunc = NULL;
808 1.3 oster qname = NULL;
809 1.3 oster }
810 1.3 oster }
811 1.3 oster /*
812 1.3 oster * Initialize the xor nodes: params are {pda,buf}
813 1.3 oster * from {Rod,Wnd,Rop} nodes, and raidPtr
814 1.3 oster */
815 1.3 oster if (numParityNodes == 2) {
816 1.3 oster /* double-xor case */
817 1.11.2.1 skrll tmpxorNode = xorNodes;
818 1.11.2.1 skrll tmpreadDataNode = readDataNodes;
819 1.11.2.1 skrll tmpreadParityNode = readParityNodes;
820 1.11.2.1 skrll tmpwriteDataNode = writeDataNodes;
821 1.11.2.1 skrll tmpqNode = qNodes;
822 1.11.2.1 skrll tmpreadQNode = readQNodes;
823 1.3 oster for (i = 0; i < numParityNodes; i++) {
824 1.3 oster /* note: no wakeup func for xor */
825 1.11.2.5 skrll rf_InitNode(tmpxorNode, rf_wait, RF_FALSE, func,
826 1.11.2.5 skrll undoFunc, NULL, 1,
827 1.11.2.5 skrll (numDataNodes + numParityNodes),
828 1.11.2.1 skrll 7, 1, dag_h, name, allocList);
829 1.11.2.1 skrll tmpxorNode->flags |= RF_DAGNODE_FLAG_YIELD;
830 1.11.2.1 skrll tmpxorNode->params[0] = tmpreadDataNode->params[0];
831 1.11.2.1 skrll tmpxorNode->params[1] = tmpreadDataNode->params[1];
832 1.11.2.1 skrll tmpxorNode->params[2] = tmpreadParityNode->params[0];
833 1.11.2.1 skrll tmpxorNode->params[3] = tmpreadParityNode->params[1];
834 1.11.2.1 skrll tmpxorNode->params[4] = tmpwriteDataNode->params[0];
835 1.11.2.1 skrll tmpxorNode->params[5] = tmpwriteDataNode->params[1];
836 1.11.2.1 skrll tmpxorNode->params[6].p = raidPtr;
837 1.3 oster /* use old parity buf as target buf */
838 1.11.2.1 skrll tmpxorNode->results[0] = tmpreadParityNode->params[1].p;
839 1.11.2.1 skrll #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
840 1.3 oster if (nfaults == 2) {
841 1.3 oster /* note: no wakeup func for qor */
842 1.11.2.5 skrll rf_InitNode(tmpqNode, rf_wait, RF_FALSE,
843 1.11.2.1 skrll qfunc, undoFunc, NULL, 1,
844 1.11.2.5 skrll (numDataNodes + numParityNodes),
845 1.11.2.1 skrll 7, 1, dag_h, qname, allocList);
846 1.11.2.1 skrll tmpqNode->params[0] = tmpreadDataNode->params[0];
847 1.11.2.1 skrll tmpqNode->params[1] = tmpreadDataNode->params[1];
848 1.11.2.1 skrll tmpqNode->params[2] = tmpreadQNode->.params[0];
849 1.11.2.1 skrll tmpqNode->params[3] = tmpreadQNode->params[1];
850 1.11.2.1 skrll tmpqNode->params[4] = tmpwriteDataNode->params[0];
851 1.11.2.1 skrll tmpqNode->params[5] = tmpwriteDataNode->params[1];
852 1.11.2.1 skrll tmpqNode->params[6].p = raidPtr;
853 1.3 oster /* use old Q buf as target buf */
854 1.11.2.1 skrll tmpqNode->results[0] = tmpreadQNode->params[1].p;
855 1.11.2.1 skrll tmpqNode = tmpqNode->list_next;
856 1.11.2.1 skrll tmpreadQNodes = tmpreadQNodes->list_next;
857 1.3 oster }
858 1.11.2.1 skrll #endif
859 1.11.2.1 skrll tmpxorNode = tmpxorNode->list_next;
860 1.11.2.1 skrll tmpreadDataNode = tmpreadDataNode->list_next;
861 1.11.2.1 skrll tmpreadParityNode = tmpreadParityNode->list_next;
862 1.11.2.1 skrll tmpwriteDataNode = tmpwriteDataNode->list_next;
863 1.3 oster }
864 1.3 oster } else {
865 1.3 oster /* there is only one xor node in this case */
866 1.11.2.5 skrll rf_InitNode(xorNodes, rf_wait, RF_FALSE, func,
867 1.11.2.1 skrll undoFunc, NULL, 1, (numDataNodes + numParityNodes),
868 1.11.2.5 skrll (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
869 1.11.2.1 skrll dag_h, name, allocList);
870 1.11.2.1 skrll xorNodes->flags |= RF_DAGNODE_FLAG_YIELD;
871 1.11.2.1 skrll tmpreadDataNode = readDataNodes;
872 1.11.2.5 skrll for (i = 0; i < numDataNodes; i++) { /* used to be"numDataNodes + 1" until we factored
873 1.11.2.1 skrll out the "+1" into the "deal with Rop separately below */
874 1.11.2.1 skrll /* set up params related to Rod nodes */
875 1.11.2.1 skrll xorNodes->params[2 * i + 0] = tmpreadDataNode->params[0]; /* pda */
876 1.11.2.1 skrll xorNodes->params[2 * i + 1] = tmpreadDataNode->params[1]; /* buffer ptr */
877 1.11.2.1 skrll tmpreadDataNode = tmpreadDataNode->list_next;
878 1.11.2.1 skrll }
879 1.11.2.1 skrll /* deal with Rop separately */
880 1.11.2.1 skrll xorNodes->params[2 * numDataNodes + 0] = readParityNodes->params[0]; /* pda */
881 1.11.2.1 skrll xorNodes->params[2 * numDataNodes + 1] = readParityNodes->params[1]; /* buffer ptr */
882 1.11.2.1 skrll
883 1.11.2.1 skrll tmpwriteDataNode = writeDataNodes;
884 1.3 oster for (i = 0; i < numDataNodes; i++) {
885 1.3 oster /* set up params related to Wnd and Wnp nodes */
886 1.11.2.1 skrll xorNodes->params[2 * (numDataNodes + 1 + i) + 0] = /* pda */
887 1.11.2.1 skrll tmpwriteDataNode->params[0];
888 1.11.2.1 skrll xorNodes->params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */
889 1.11.2.1 skrll tmpwriteDataNode->params[1];
890 1.11.2.1 skrll tmpwriteDataNode = tmpwriteDataNode->list_next;
891 1.3 oster }
892 1.3 oster /* xor node needs to get at RAID information */
893 1.11.2.1 skrll xorNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
894 1.11.2.1 skrll xorNodes->results[0] = readParityNodes->params[1].p;
895 1.11.2.1 skrll #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
896 1.3 oster if (nfaults == 2) {
897 1.11.2.5 skrll rf_InitNode(qNodes, rf_wait, RF_FALSE, qfunc,
898 1.11.2.1 skrll undoFunc, NULL, 1,
899 1.11.2.1 skrll (numDataNodes + numParityNodes),
900 1.11.2.1 skrll (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
901 1.11.2.1 skrll dag_h, qname, allocList);
902 1.11.2.1 skrll tmpreadDataNode = readDataNodes;
903 1.3 oster for (i = 0; i < numDataNodes; i++) {
904 1.3 oster /* set up params related to Rod */
905 1.11.2.1 skrll qNodes->params[2 * i + 0] = tmpreadDataNode->params[0]; /* pda */
906 1.11.2.1 skrll qNodes->params[2 * i + 1] = tmpreadDataNode->params[1]; /* buffer ptr */
907 1.11.2.1 skrll tmpreadDataNode = tmpreadDataNode->list_next;
908 1.3 oster }
909 1.3 oster /* and read old q */
910 1.11.2.1 skrll qNodes->params[2 * numDataNodes + 0] = /* pda */
911 1.11.2.1 skrll readQNodes->params[0];
912 1.11.2.1 skrll qNodes->params[2 * numDataNodes + 1] = /* buffer ptr */
913 1.11.2.1 skrll readQNodes->params[1];
914 1.11.2.1 skrll tmpwriteDataNode = writeDataNodes;
915 1.3 oster for (i = 0; i < numDataNodes; i++) {
916 1.3 oster /* set up params related to Wnd nodes */
917 1.11.2.1 skrll qNodes->params[2 * (numDataNodes + 1 + i) + 0] = /* pda */
918 1.11.2.1 skrll tmpwriteDataNode->params[0];
919 1.11.2.1 skrll qNodes->params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */
920 1.11.2.1 skrll tmpwriteDataNode->params[1];
921 1.11.2.1 skrll tmpwriteDataNode = tmpwriteDataNode->list_next;
922 1.3 oster }
923 1.3 oster /* xor node needs to get at RAID information */
924 1.11.2.1 skrll qNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
925 1.11.2.1 skrll qNodes->results[0] = readQNodes->params[1].p;
926 1.3 oster }
927 1.11.2.1 skrll #endif
928 1.3 oster }
929 1.3 oster
930 1.3 oster /* initialize nodes which write new parity (Wnp) */
931 1.3 oster pda = asmap->parityInfo;
932 1.11.2.1 skrll tmpwriteParityNode = writeParityNodes;
933 1.11.2.1 skrll tmpxorNode = xorNodes;
934 1.3 oster for (i = 0; i < numParityNodes; i++) {
935 1.11.2.5 skrll rf_InitNode(tmpwriteParityNode, rf_wait, RF_FALSE,
936 1.11.2.1 skrll rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
937 1.11.2.1 skrll rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
938 1.11.2.1 skrll "Wnp", allocList);
939 1.3 oster RF_ASSERT(pda != NULL);
940 1.11.2.1 skrll tmpwriteParityNode->params[0].p = pda; /* param 1 (bufPtr)
941 1.11.2.1 skrll * filled in by xor node */
942 1.11.2.1 skrll tmpwriteParityNode->params[1].p = tmpxorNode->results[0]; /* buffer pointer for
943 1.11.2.1 skrll * parity write
944 1.11.2.1 skrll * operation */
945 1.11.2.1 skrll tmpwriteParityNode->params[2].v = parityStripeID;
946 1.11.2.1 skrll tmpwriteParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
947 1.11.2.1 skrll which_ru);
948 1.3 oster pda = pda->next;
949 1.11.2.1 skrll tmpwriteParityNode = tmpwriteParityNode->list_next;
950 1.11.2.1 skrll tmpxorNode = tmpxorNode->list_next;
951 1.3 oster }
952 1.3 oster
953 1.11.2.1 skrll #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
954 1.3 oster /* initialize nodes which write new Q (Wnq) */
955 1.3 oster if (nfaults == 2) {
956 1.3 oster pda = asmap->qInfo;
957 1.11.2.1 skrll tmpwriteQNode = writeQNodes;
958 1.11.2.1 skrll tmpqNode = qNodes;
959 1.3 oster for (i = 0; i < numParityNodes; i++) {
960 1.11.2.5 skrll rf_InitNode(tmpwriteQNode, rf_wait, RF_FALSE,
961 1.11.2.5 skrll rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
962 1.11.2.1 skrll rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
963 1.11.2.1 skrll "Wnq", allocList);
964 1.3 oster RF_ASSERT(pda != NULL);
965 1.11.2.1 skrll tmpwriteQNode->params[0].p = pda; /* param 1 (bufPtr)
966 1.3 oster * filled in by xor node */
967 1.11.2.1 skrll tmpwriteQNode->params[1].p = tmpqNode->results[0]; /* buffer pointer for
968 1.3 oster * parity write
969 1.3 oster * operation */
970 1.11.2.1 skrll tmpwriteQNode->params[2].v = parityStripeID;
971 1.11.2.1 skrll tmpwriteQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
972 1.11.2.1 skrll which_ru);
973 1.3 oster pda = pda->next;
974 1.11.2.1 skrll tmpwriteQNode = tmpwriteQNode->list_next;
975 1.11.2.1 skrll tmpqNode = tmpqNode->list_next;
976 1.3 oster }
977 1.3 oster }
978 1.11.2.1 skrll #endif
979 1.3 oster /*
980 1.3 oster * Step 4. connect the nodes.
981 1.3 oster */
982 1.3 oster
983 1.3 oster /* connect header to block node */
984 1.3 oster dag_h->succedents[0] = blockNode;
985 1.3 oster
986 1.3 oster /* connect block node to read old data nodes */
987 1.3 oster RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
988 1.11.2.1 skrll tmpreadDataNode = readDataNodes;
989 1.3 oster for (i = 0; i < numDataNodes; i++) {
990 1.11.2.1 skrll blockNode->succedents[i] = tmpreadDataNode;
991 1.11.2.1 skrll RF_ASSERT(tmpreadDataNode->numAntecedents == 1);
992 1.11.2.1 skrll tmpreadDataNode->antecedents[0] = blockNode;
993 1.11.2.1 skrll tmpreadDataNode->antType[0] = rf_control;
994 1.11.2.1 skrll tmpreadDataNode = tmpreadDataNode->list_next;
995 1.3 oster }
996 1.3 oster
997 1.3 oster /* connect block node to read old parity nodes */
998 1.11.2.1 skrll tmpreadParityNode = readParityNodes;
999 1.3 oster for (i = 0; i < numParityNodes; i++) {
1000 1.11.2.1 skrll blockNode->succedents[numDataNodes + i] = tmpreadParityNode;
1001 1.11.2.1 skrll RF_ASSERT(tmpreadParityNode->numAntecedents == 1);
1002 1.11.2.1 skrll tmpreadParityNode->antecedents[0] = blockNode;
1003 1.11.2.1 skrll tmpreadParityNode->antType[0] = rf_control;
1004 1.11.2.1 skrll tmpreadParityNode = tmpreadParityNode->list_next;
1005 1.3 oster }
1006 1.3 oster
1007 1.11.2.1 skrll #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1008 1.3 oster /* connect block node to read old Q nodes */
1009 1.3 oster if (nfaults == 2) {
1010 1.11.2.1 skrll tmpreadQNode = readQNodes;
1011 1.3 oster for (i = 0; i < numParityNodes; i++) {
1012 1.11.2.1 skrll blockNode->succedents[numDataNodes + numParityNodes + i] = tmpreadQNode;
1013 1.11.2.1 skrll RF_ASSERT(tmpreadQNode->numAntecedents == 1);
1014 1.11.2.1 skrll tmpreadQNode->antecedents[0] = blockNode;
1015 1.11.2.1 skrll tmpreadQNode->antType[0] = rf_control;
1016 1.11.2.1 skrll tmpreadQNode = tmpreadQNode->list_next;
1017 1.3 oster }
1018 1.3 oster }
1019 1.11.2.1 skrll #endif
1020 1.3 oster /* connect read old data nodes to xor nodes */
1021 1.11.2.1 skrll tmpreadDataNode = readDataNodes;
1022 1.3 oster for (i = 0; i < numDataNodes; i++) {
1023 1.11.2.1 skrll RF_ASSERT(tmpreadDataNode->numSuccedents == (nfaults * numParityNodes));
1024 1.11.2.1 skrll tmpxorNode = xorNodes;
1025 1.3 oster for (j = 0; j < numParityNodes; j++) {
1026 1.11.2.1 skrll RF_ASSERT(tmpxorNode->numAntecedents == numDataNodes + numParityNodes);
1027 1.11.2.1 skrll tmpreadDataNode->succedents[j] = tmpxorNode;
1028 1.11.2.1 skrll tmpxorNode->antecedents[i] = tmpreadDataNode;
1029 1.11.2.1 skrll tmpxorNode->antType[i] = rf_trueData;
1030 1.11.2.1 skrll tmpxorNode = tmpxorNode->list_next;
1031 1.3 oster }
1032 1.11.2.1 skrll tmpreadDataNode = tmpreadDataNode->list_next;
1033 1.3 oster }
1034 1.3 oster
1035 1.11.2.1 skrll #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1036 1.3 oster /* connect read old data nodes to q nodes */
1037 1.3 oster if (nfaults == 2) {
1038 1.11.2.1 skrll tmpreadDataNode = readDataNodes;
1039 1.3 oster for (i = 0; i < numDataNodes; i++) {
1040 1.11.2.1 skrll tmpqNode = qNodes;
1041 1.3 oster for (j = 0; j < numParityNodes; j++) {
1042 1.11.2.1 skrll RF_ASSERT(tmpqNode->numAntecedents == numDataNodes + numParityNodes);
1043 1.11.2.1 skrll tmpreadDataNode->succedents[numParityNodes + j] = tmpqNode;
1044 1.11.2.1 skrll tmpqNode->antecedents[i] = tmpreadDataNode;
1045 1.11.2.1 skrll tmpqNode->antType[i] = rf_trueData;
1046 1.11.2.1 skrll tmpqNode = tmpqNode->list_next;
1047 1.3 oster }
1048 1.11.2.1 skrll tmpreadDataNode = tmpreadDataNode->list_next;
1049 1.3 oster }
1050 1.3 oster }
1051 1.11.2.1 skrll #endif
1052 1.3 oster /* connect read old parity nodes to xor nodes */
1053 1.11.2.1 skrll tmpreadParityNode = readParityNodes;
1054 1.3 oster for (i = 0; i < numParityNodes; i++) {
1055 1.11.2.1 skrll RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1056 1.11.2.1 skrll tmpxorNode = xorNodes;
1057 1.3 oster for (j = 0; j < numParityNodes; j++) {
1058 1.11.2.1 skrll tmpreadParityNode->succedents[j] = tmpxorNode;
1059 1.11.2.1 skrll tmpxorNode->antecedents[numDataNodes + i] = tmpreadParityNode;
1060 1.11.2.1 skrll tmpxorNode->antType[numDataNodes + i] = rf_trueData;
1061 1.11.2.1 skrll tmpxorNode = tmpxorNode->list_next;
1062 1.3 oster }
1063 1.11.2.1 skrll tmpreadParityNode = tmpreadParityNode->list_next;
1064 1.3 oster }
1065 1.3 oster
1066 1.11.2.1 skrll #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1067 1.3 oster /* connect read old q nodes to q nodes */
1068 1.3 oster if (nfaults == 2) {
1069 1.11.2.1 skrll tmpreadParityNode = readParityNodes;
1070 1.11.2.1 skrll tmpreadQNode = readQNodes;
1071 1.3 oster for (i = 0; i < numParityNodes; i++) {
1072 1.11.2.1 skrll RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1073 1.11.2.1 skrll tmpqNode = qNodes;
1074 1.3 oster for (j = 0; j < numParityNodes; j++) {
1075 1.11.2.1 skrll tmpreadQNode->succedents[j] = tmpqNode;
1076 1.11.2.1 skrll tmpqNode->antecedents[numDataNodes + i] = tmpreadQNodes;
1077 1.11.2.1 skrll tmpqNode->antType[numDataNodes + i] = rf_trueData;
1078 1.11.2.1 skrll tmpqNode = tmpqNode->list_next;
1079 1.3 oster }
1080 1.11.2.1 skrll tmpreadParityNode = tmpreadParityNode->list_next;
1081 1.11.2.1 skrll tmpreadQNode = tmpreadQNode->list_next;
1082 1.3 oster }
1083 1.3 oster }
1084 1.11.2.1 skrll #endif
1085 1.3 oster /* connect xor nodes to commit node */
1086 1.3 oster RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
1087 1.11.2.1 skrll tmpxorNode = xorNodes;
1088 1.3 oster for (i = 0; i < numParityNodes; i++) {
1089 1.11.2.1 skrll RF_ASSERT(tmpxorNode->numSuccedents == 1);
1090 1.11.2.1 skrll tmpxorNode->succedents[0] = commitNode;
1091 1.11.2.1 skrll commitNode->antecedents[i] = tmpxorNode;
1092 1.3 oster commitNode->antType[i] = rf_control;
1093 1.11.2.1 skrll tmpxorNode = tmpxorNode->list_next;
1094 1.3 oster }
1095 1.3 oster
1096 1.11.2.1 skrll #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1097 1.3 oster /* connect q nodes to commit node */
1098 1.3 oster if (nfaults == 2) {
1099 1.11.2.1 skrll tmpqNode = qNodes;
1100 1.3 oster for (i = 0; i < numParityNodes; i++) {
1101 1.11.2.1 skrll RF_ASSERT(tmpqNode->numSuccedents == 1);
1102 1.11.2.1 skrll tmpqNode->succedents[0] = commitNode;
1103 1.11.2.1 skrll commitNode->antecedents[i + numParityNodes] = tmpqNode;
1104 1.3 oster commitNode->antType[i + numParityNodes] = rf_control;
1105 1.11.2.1 skrll tmpqNode = tmpqNode->list_next;
1106 1.3 oster }
1107 1.3 oster }
1108 1.11.2.1 skrll #endif
1109 1.3 oster /* connect commit node to write nodes */
1110 1.3 oster RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
1111 1.11.2.1 skrll tmpwriteDataNode = writeDataNodes;
1112 1.3 oster for (i = 0; i < numDataNodes; i++) {
1113 1.11.2.2 skrll RF_ASSERT(tmpwriteDataNode->numAntecedents == 1);
1114 1.11.2.1 skrll commitNode->succedents[i] = tmpwriteDataNode;
1115 1.11.2.1 skrll tmpwriteDataNode->antecedents[0] = commitNode;
1116 1.11.2.1 skrll tmpwriteDataNode->antType[0] = rf_trueData;
1117 1.11.2.1 skrll tmpwriteDataNode = tmpwriteDataNode->list_next;
1118 1.3 oster }
1119 1.11.2.1 skrll tmpwriteParityNode = writeParityNodes;
1120 1.3 oster for (i = 0; i < numParityNodes; i++) {
1121 1.11.2.1 skrll RF_ASSERT(tmpwriteParityNode->numAntecedents == 1);
1122 1.11.2.1 skrll commitNode->succedents[i + numDataNodes] = tmpwriteParityNode;
1123 1.11.2.1 skrll tmpwriteParityNode->antecedents[0] = commitNode;
1124 1.11.2.1 skrll tmpwriteParityNode->antType[0] = rf_trueData;
1125 1.11.2.1 skrll tmpwriteParityNode = tmpwriteParityNode->list_next;
1126 1.3 oster }
1127 1.11.2.1 skrll #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1128 1.3 oster if (nfaults == 2) {
1129 1.11.2.1 skrll tmpwriteQNode = writeQNodes;
1130 1.3 oster for (i = 0; i < numParityNodes; i++) {
1131 1.11.2.1 skrll RF_ASSERT(tmpwriteQNode->numAntecedents == 1);
1132 1.11.2.1 skrll commitNode->succedents[i + numDataNodes + numParityNodes] = tmpwriteQNode;
1133 1.11.2.1 skrll tmpwriteQNode->antecedents[0] = commitNode;
1134 1.11.2.1 skrll tmpwriteQNode->antType[0] = rf_trueData;
1135 1.11.2.1 skrll tmpwriteQNode = tmpwriteQNode->list_next;
1136 1.3 oster }
1137 1.3 oster }
1138 1.11.2.1 skrll #endif
1139 1.3 oster RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1140 1.3 oster RF_ASSERT(termNode->numSuccedents == 0);
1141 1.11.2.1 skrll tmpwriteDataNode = writeDataNodes;
1142 1.3 oster for (i = 0; i < numDataNodes; i++) {
1143 1.11.2.1 skrll /* connect write new data nodes to term node */
1144 1.11.2.1 skrll RF_ASSERT(tmpwriteDataNode->numSuccedents == 1);
1145 1.11.2.1 skrll RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1146 1.11.2.1 skrll tmpwriteDataNode->succedents[0] = termNode;
1147 1.11.2.1 skrll termNode->antecedents[i] = tmpwriteDataNode;
1148 1.11.2.1 skrll termNode->antType[i] = rf_control;
1149 1.11.2.1 skrll tmpwriteDataNode = tmpwriteDataNode->list_next;
1150 1.3 oster }
1151 1.3 oster
1152 1.11.2.1 skrll tmpwriteParityNode = writeParityNodes;
1153 1.3 oster for (i = 0; i < numParityNodes; i++) {
1154 1.11.2.1 skrll RF_ASSERT(tmpwriteParityNode->numSuccedents == 1);
1155 1.11.2.1 skrll tmpwriteParityNode->succedents[0] = termNode;
1156 1.11.2.1 skrll termNode->antecedents[numDataNodes + i] = tmpwriteParityNode;
1157 1.11.2.1 skrll termNode->antType[numDataNodes + i] = rf_control;
1158 1.11.2.1 skrll tmpwriteParityNode = tmpwriteParityNode->list_next;
1159 1.3 oster }
1160 1.3 oster
1161 1.11.2.1 skrll #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1162 1.3 oster if (nfaults == 2) {
1163 1.11.2.1 skrll tmpwriteQNode = writeQNodes;
1164 1.3 oster for (i = 0; i < numParityNodes; i++) {
1165 1.11.2.1 skrll RF_ASSERT(tmpwriteQNode->numSuccedents == 1);
1166 1.11.2.1 skrll tmpwriteQNode->succedents[0] = termNode;
1167 1.11.2.1 skrll termNode->antecedents[numDataNodes + numParityNodes + i] = tmpwriteQNode;
1168 1.11.2.1 skrll termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
1169 1.11.2.1 skrll tmpwriteQNode = tmpwriteQNode->list_next;
1170 1.3 oster }
1171 1.3 oster }
1172 1.11.2.1 skrll #endif
1173 1.1 oster }
1174 1.1 oster
1175 1.1 oster
1176 1.1 oster /******************************************************************************
1177 1.1 oster * create a write graph (fault-free or degraded) for RAID level 1
1178 1.1 oster *
1179 1.1 oster * Hdr -> Commit -> Wpd -> Nil -> Trm
1180 1.1 oster * -> Wsd ->
1181 1.1 oster *
1182 1.1 oster * The "Wpd" node writes data to the primary copy in the mirror pair
1183 1.1 oster * The "Wsd" node writes data to the secondary copy in the mirror pair
1184 1.1 oster *
1185 1.1 oster * Parameters: raidPtr - description of the physical array
1186 1.1 oster * asmap - logical & physical addresses for this access
1187 1.1 oster * bp - buffer ptr (holds write data)
1188 1.3 oster * flags - general flags (e.g. disk locking)
1189 1.1 oster * allocList - list of memory allocated in DAG creation
1190 1.1 oster *****************************************************************************/
1191 1.1 oster
1192 1.11.2.5 skrll void
1193 1.11.2.1 skrll rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
1194 1.11.2.1 skrll RF_DagHeader_t *dag_h, void *bp,
1195 1.11.2.1 skrll RF_RaidAccessFlags_t flags,
1196 1.11.2.1 skrll RF_AllocListElem_t *allocList)
1197 1.1 oster {
1198 1.3 oster RF_DagNode_t *unblockNode, *termNode, *commitNode;
1199 1.11.2.1 skrll RF_DagNode_t *wndNode, *wmirNode;
1200 1.11.2.1 skrll RF_DagNode_t *tmpNode, *tmpwndNode, *tmpwmirNode;
1201 1.3 oster int nWndNodes, nWmirNodes, i;
1202 1.3 oster RF_ReconUnitNum_t which_ru;
1203 1.3 oster RF_PhysDiskAddr_t *pda, *pdaP;
1204 1.3 oster RF_StripeNum_t parityStripeID;
1205 1.3 oster
1206 1.3 oster parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
1207 1.3 oster asmap->raidAddress, &which_ru);
1208 1.11.2.1 skrll #if RF_DEBUG_DAG
1209 1.3 oster if (rf_dagDebug) {
1210 1.3 oster printf("[Creating RAID level 1 write DAG]\n");
1211 1.3 oster }
1212 1.11.2.1 skrll #endif
1213 1.3 oster dag_h->creator = "RaidOneWriteDAG";
1214 1.3 oster
1215 1.3 oster /* 2 implies access not SU aligned */
1216 1.3 oster nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
1217 1.3 oster nWndNodes = (asmap->physInfo->next) ? 2 : 1;
1218 1.3 oster
1219 1.3 oster /* alloc the Wnd nodes and the Wmir node */
1220 1.3 oster if (asmap->numDataFailed == 1)
1221 1.3 oster nWndNodes--;
1222 1.3 oster if (asmap->numParityFailed == 1)
1223 1.3 oster nWmirNodes--;
1224 1.3 oster
1225 1.3 oster /* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
1226 1.3 oster * + terminator) */
1227 1.11.2.1 skrll for (i = 0; i < nWndNodes; i++) {
1228 1.11.2.1 skrll tmpNode = rf_AllocDAGNode();
1229 1.11.2.1 skrll tmpNode->list_next = dag_h->nodes;
1230 1.11.2.1 skrll dag_h->nodes = tmpNode;
1231 1.11.2.1 skrll }
1232 1.11.2.1 skrll wndNode = dag_h->nodes;
1233 1.11.2.1 skrll
1234 1.11.2.1 skrll for (i = 0; i < nWmirNodes; i++) {
1235 1.11.2.1 skrll tmpNode = rf_AllocDAGNode();
1236 1.11.2.1 skrll tmpNode->list_next = dag_h->nodes;
1237 1.11.2.1 skrll dag_h->nodes = tmpNode;
1238 1.11.2.1 skrll }
1239 1.11.2.1 skrll wmirNode = dag_h->nodes;
1240 1.11.2.1 skrll
1241 1.11.2.1 skrll commitNode = rf_AllocDAGNode();
1242 1.11.2.1 skrll commitNode->list_next = dag_h->nodes;
1243 1.11.2.1 skrll dag_h->nodes = commitNode;
1244 1.11.2.1 skrll
1245 1.11.2.1 skrll unblockNode = rf_AllocDAGNode();
1246 1.11.2.1 skrll unblockNode->list_next = dag_h->nodes;
1247 1.11.2.1 skrll dag_h->nodes = unblockNode;
1248 1.11.2.1 skrll
1249 1.11.2.1 skrll termNode = rf_AllocDAGNode();
1250 1.11.2.1 skrll termNode->list_next = dag_h->nodes;
1251 1.11.2.1 skrll dag_h->nodes = termNode;
1252 1.3 oster
1253 1.3 oster /* this dag can commit immediately */
1254 1.3 oster dag_h->numCommitNodes = 1;
1255 1.3 oster dag_h->numCommits = 0;
1256 1.3 oster dag_h->numSuccedents = 1;
1257 1.3 oster
1258 1.3 oster /* initialize the commit, unblock, and term nodes */
1259 1.11.2.5 skrll rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
1260 1.11.2.5 skrll rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes),
1261 1.11.2.1 skrll 0, 0, 0, dag_h, "Cmt", allocList);
1262 1.11.2.5 skrll rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
1263 1.11.2.5 skrll rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes),
1264 1.11.2.1 skrll 0, 0, dag_h, "Nil", allocList);
1265 1.11.2.5 skrll rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
1266 1.11.2.5 skrll rf_TerminateUndoFunc, NULL, 0, 1, 0, 0,
1267 1.11.2.1 skrll dag_h, "Trm", allocList);
1268 1.3 oster
1269 1.3 oster /* initialize the wnd nodes */
1270 1.3 oster if (nWndNodes > 0) {
1271 1.3 oster pda = asmap->physInfo;
1272 1.11.2.1 skrll tmpwndNode = wndNode;
1273 1.3 oster for (i = 0; i < nWndNodes; i++) {
1274 1.11.2.5 skrll rf_InitNode(tmpwndNode, rf_wait, RF_FALSE,
1275 1.11.2.1 skrll rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1276 1.11.2.5 skrll rf_GenericWakeupFunc, 1, 1, 4, 0,
1277 1.11.2.1 skrll dag_h, "Wpd", allocList);
1278 1.3 oster RF_ASSERT(pda != NULL);
1279 1.11.2.1 skrll tmpwndNode->params[0].p = pda;
1280 1.11.2.1 skrll tmpwndNode->params[1].p = pda->bufPtr;
1281 1.11.2.1 skrll tmpwndNode->params[2].v = parityStripeID;
1282 1.11.2.1 skrll tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1283 1.3 oster pda = pda->next;
1284 1.11.2.1 skrll tmpwndNode = tmpwndNode->list_next;
1285 1.3 oster }
1286 1.3 oster RF_ASSERT(pda == NULL);
1287 1.3 oster }
1288 1.3 oster /* initialize the mirror nodes */
1289 1.3 oster if (nWmirNodes > 0) {
1290 1.3 oster pda = asmap->physInfo;
1291 1.3 oster pdaP = asmap->parityInfo;
1292 1.11.2.1 skrll tmpwmirNode = wmirNode;
1293 1.3 oster for (i = 0; i < nWmirNodes; i++) {
1294 1.11.2.5 skrll rf_InitNode(tmpwmirNode, rf_wait, RF_FALSE,
1295 1.11.2.1 skrll rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1296 1.11.2.5 skrll rf_GenericWakeupFunc, 1, 1, 4, 0,
1297 1.11.2.1 skrll dag_h, "Wsd", allocList);
1298 1.3 oster RF_ASSERT(pda != NULL);
1299 1.11.2.1 skrll tmpwmirNode->params[0].p = pdaP;
1300 1.11.2.1 skrll tmpwmirNode->params[1].p = pda->bufPtr;
1301 1.11.2.1 skrll tmpwmirNode->params[2].v = parityStripeID;
1302 1.11.2.1 skrll tmpwmirNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1303 1.3 oster pda = pda->next;
1304 1.3 oster pdaP = pdaP->next;
1305 1.11.2.1 skrll tmpwmirNode = tmpwmirNode->list_next;
1306 1.3 oster }
1307 1.3 oster RF_ASSERT(pda == NULL);
1308 1.3 oster RF_ASSERT(pdaP == NULL);
1309 1.3 oster }
1310 1.3 oster /* link the header node to the commit node */
1311 1.3 oster RF_ASSERT(dag_h->numSuccedents == 1);
1312 1.3 oster RF_ASSERT(commitNode->numAntecedents == 0);
1313 1.3 oster dag_h->succedents[0] = commitNode;
1314 1.3 oster
1315 1.3 oster /* link the commit node to the write nodes */
1316 1.3 oster RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
1317 1.11.2.1 skrll tmpwndNode = wndNode;
1318 1.3 oster for (i = 0; i < nWndNodes; i++) {
1319 1.11.2.1 skrll RF_ASSERT(tmpwndNode->numAntecedents == 1);
1320 1.11.2.1 skrll commitNode->succedents[i] = tmpwndNode;
1321 1.11.2.1 skrll tmpwndNode->antecedents[0] = commitNode;
1322 1.11.2.1 skrll tmpwndNode->antType[0] = rf_control;
1323 1.11.2.1 skrll tmpwndNode = tmpwndNode->list_next;
1324 1.3 oster }
1325 1.11.2.1 skrll tmpwmirNode = wmirNode;
1326 1.3 oster for (i = 0; i < nWmirNodes; i++) {
1327 1.11.2.1 skrll RF_ASSERT(tmpwmirNode->numAntecedents == 1);
1328 1.11.2.1 skrll commitNode->succedents[i + nWndNodes] = tmpwmirNode;
1329 1.11.2.1 skrll tmpwmirNode->antecedents[0] = commitNode;
1330 1.11.2.1 skrll tmpwmirNode->antType[0] = rf_control;
1331 1.11.2.1 skrll tmpwmirNode = tmpwmirNode->list_next;
1332 1.3 oster }
1333 1.3 oster
1334 1.3 oster /* link the write nodes to the unblock node */
1335 1.3 oster RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
1336 1.11.2.1 skrll tmpwndNode = wndNode;
1337 1.3 oster for (i = 0; i < nWndNodes; i++) {
1338 1.11.2.1 skrll RF_ASSERT(tmpwndNode->numSuccedents == 1);
1339 1.11.2.1 skrll tmpwndNode->succedents[0] = unblockNode;
1340 1.11.2.1 skrll unblockNode->antecedents[i] = tmpwndNode;
1341 1.3 oster unblockNode->antType[i] = rf_control;
1342 1.11.2.1 skrll tmpwndNode = tmpwndNode->list_next;
1343 1.3 oster }
1344 1.11.2.1 skrll tmpwmirNode = wmirNode;
1345 1.3 oster for (i = 0; i < nWmirNodes; i++) {
1346 1.11.2.1 skrll RF_ASSERT(tmpwmirNode->numSuccedents == 1);
1347 1.11.2.1 skrll tmpwmirNode->succedents[0] = unblockNode;
1348 1.11.2.1 skrll unblockNode->antecedents[i + nWndNodes] = tmpwmirNode;
1349 1.3 oster unblockNode->antType[i + nWndNodes] = rf_control;
1350 1.11.2.1 skrll tmpwmirNode = tmpwmirNode->list_next;
1351 1.3 oster }
1352 1.3 oster
1353 1.3 oster /* link the unblock node to the term node */
1354 1.3 oster RF_ASSERT(unblockNode->numSuccedents == 1);
1355 1.3 oster RF_ASSERT(termNode->numAntecedents == 1);
1356 1.3 oster RF_ASSERT(termNode->numSuccedents == 0);
1357 1.3 oster unblockNode->succedents[0] = termNode;
1358 1.3 oster termNode->antecedents[0] = unblockNode;
1359 1.3 oster termNode->antType[0] = rf_control;
1360 1.1 oster }
1361