Home | History | Annotate | Line # | Download | only in raidframe
rf_dagffwr.c revision 1.23
      1 /*	$NetBSD: rf_dagffwr.c,v 1.23 2004/03/20 04:22:05 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  * rf_dagff.c
     31  *
     32  * code for creating fault-free DAGs
     33  *
     34  */
     35 
     36 #include <sys/cdefs.h>
     37 __KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 1.23 2004/03/20 04:22:05 oster Exp $");
     38 
     39 #include <dev/raidframe/raidframevar.h>
     40 
     41 #include "rf_raid.h"
     42 #include "rf_dag.h"
     43 #include "rf_dagutils.h"
     44 #include "rf_dagfuncs.h"
     45 #include "rf_debugMem.h"
     46 #include "rf_dagffrd.h"
     47 #include "rf_general.h"
     48 #include "rf_dagffwr.h"
     49 #include "rf_map.h"
     50 
     51 /******************************************************************************
     52  *
     53  * General comments on DAG creation:
     54  *
     55  * All DAGs in this file use roll-away error recovery.  Each DAG has a single
     56  * commit node, usually called "Cmt."  If an error occurs before the Cmt node
     57  * is reached, the execution engine will halt forward execution and work
     58  * backward through the graph, executing the undo functions.  Assuming that
     59  * each node in the graph prior to the Cmt node are undoable and atomic - or -
     60  * does not make changes to permanent state, the graph will fail atomically.
     61  * If an error occurs after the Cmt node executes, the engine will roll-forward
     62  * through the graph, blindly executing nodes until it reaches the end.
     63  * If a graph reaches the end, it is assumed to have completed successfully.
     64  *
     65  * A graph has only 1 Cmt node.
     66  *
     67  */
     68 
     69 
     70 /******************************************************************************
     71  *
     72  * The following wrappers map the standard DAG creation interface to the
     73  * DAG creation routines.  Additionally, these wrappers enable experimentation
     74  * with new DAG structures by providing an extra level of indirection, allowing
     75  * the DAG creation routines to be replaced at this single point.
     76  */
     77 
     78 
     79 void
     80 rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
     81 			      RF_DagHeader_t *dag_h, void *bp,
     82 			      RF_RaidAccessFlags_t flags,
     83 			      RF_AllocListElem_t *allocList,
     84 			      RF_IoType_t type)
     85 {
     86 	rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
     87 				 RF_IO_TYPE_WRITE);
     88 }
     89 
     90 void
     91 rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
     92 		       RF_DagHeader_t *dag_h, void *bp,
     93 		       RF_RaidAccessFlags_t flags,
     94 		       RF_AllocListElem_t *allocList,
     95 		       RF_IoType_t type)
     96 {
     97 	rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
     98 				 RF_IO_TYPE_WRITE);
     99 }
    100 
    101 void
    102 rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
    103 		       RF_DagHeader_t *dag_h, void *bp,
    104 		       RF_RaidAccessFlags_t flags,
    105 		       RF_AllocListElem_t *allocList)
    106 {
    107 	/* "normal" rollaway */
    108 	rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
    109 				     allocList, &rf_xorFuncs, NULL);
    110 }
    111 
    112 void
    113 rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
    114 		       RF_DagHeader_t *dag_h, void *bp,
    115 		       RF_RaidAccessFlags_t flags,
    116 		       RF_AllocListElem_t *allocList)
    117 {
    118 	/* "normal" rollaway */
    119 	rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
    120 				     allocList, 1, rf_RegularXorFunc, RF_TRUE);
    121 }
    122 
    123 
    124 /******************************************************************************
    125  *
    126  * DAG creation code begins here
    127  */
    128 
    129 
    130 /******************************************************************************
    131  *
    132  * creates a DAG to perform a large-write operation:
    133  *
    134  *           / Rod \           / Wnd \
    135  * H -- block- Rod - Xor - Cmt - Wnd --- T
    136  *           \ Rod /          \  Wnp /
    137  *                             \[Wnq]/
    138  *
    139  * The XOR node also does the Q calculation in the P+Q architecture.
    140  * All nodes are before the commit node (Cmt) are assumed to be atomic and
    141  * undoable - or - they make no changes to permanent state.
    142  *
    143  * Rod = read old data
    144  * Cmt = commit node
    145  * Wnp = write new parity
    146  * Wnd = write new data
    147  * Wnq = write new "q"
    148  * [] denotes optional segments in the graph
    149  *
    150  * Parameters:  raidPtr   - description of the physical array
    151  *              asmap     - logical & physical addresses for this access
    152  *              bp        - buffer ptr (holds write data)
    153  *              flags     - general flags (e.g. disk locking)
    154  *              allocList - list of memory allocated in DAG creation
    155  *              nfaults   - number of faults array can tolerate
    156  *                          (equal to # redundancy units in stripe)
    157  *              redfuncs  - list of redundancy generating functions
    158  *
    159  *****************************************************************************/
    160 
    161 void
    162 rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
    163 			     RF_DagHeader_t *dag_h, void *bp,
    164 			     RF_RaidAccessFlags_t flags,
    165 			     RF_AllocListElem_t *allocList,
    166 			     int nfaults, int (*redFunc) (RF_DagNode_t *),
    167 			     int allowBufferRecycle)
    168 {
    169 	RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode;
    170 	RF_DagNode_t *wnqNode, *blockNode, *commitNode, *termNode;
    171 	int     nWndNodes, nRodNodes, i, nodeNum, asmNum;
    172 	RF_AccessStripeMapHeader_t *new_asm_h[2];
    173 	RF_StripeNum_t parityStripeID;
    174 	char   *sosBuffer, *eosBuffer;
    175 	RF_ReconUnitNum_t which_ru;
    176 	RF_RaidLayout_t *layoutPtr;
    177 	RF_PhysDiskAddr_t *pda;
    178 	RF_VoidPointerListElem_t *vple;
    179 
    180 	layoutPtr = &(raidPtr->Layout);
    181 	parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
    182 							asmap->raidAddress,
    183 							&which_ru);
    184 
    185 #if RF_DEBUG_DAG
    186 	if (rf_dagDebug) {
    187 		printf("[Creating large-write DAG]\n");
    188 	}
    189 #endif
    190 	dag_h->creator = "LargeWriteDAG";
    191 
    192 	dag_h->numCommitNodes = 1;
    193 	dag_h->numCommits = 0;
    194 	dag_h->numSuccedents = 1;
    195 
    196 	/* alloc the nodes: Wnd, xor, commit, block, term, and  Wnp */
    197 	nWndNodes = asmap->numStripeUnitsAccessed;
    198 
    199 	for (i = 0; i < nWndNodes; i++) {
    200 		tmpNode = rf_AllocDAGNode();
    201 		tmpNode->list_next = dag_h->nodes;
    202 		dag_h->nodes = tmpNode;
    203 	}
    204 	wndNodes = dag_h->nodes;
    205 
    206 	xorNode = rf_AllocDAGNode();
    207 	xorNode->list_next = dag_h->nodes;
    208 	dag_h->nodes = xorNode;
    209 
    210 	wnpNode = rf_AllocDAGNode();
    211 	wnpNode->list_next = dag_h->nodes;
    212 	dag_h->nodes = wnpNode;
    213 
    214 	blockNode = rf_AllocDAGNode();
    215 	blockNode->list_next = dag_h->nodes;
    216 	dag_h->nodes = blockNode;
    217 
    218 	commitNode = rf_AllocDAGNode();
    219 	commitNode->list_next = dag_h->nodes;
    220 	dag_h->nodes = commitNode;
    221 
    222 	termNode = rf_AllocDAGNode();
    223 	termNode->list_next = dag_h->nodes;
    224 	dag_h->nodes = termNode;
    225 
    226 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    227 	if (nfaults == 2) {
    228 		wnqNode = rf_AllocDAGNode();
    229 	} else {
    230 #endif
    231 		wnqNode = NULL;
    232 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    233 	}
    234 #endif
    235 	rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
    236 					new_asm_h, &nRodNodes, &sosBuffer,
    237 					&eosBuffer, allocList);
    238 	if (nRodNodes > 0) {
    239 		for (i = 0; i < nRodNodes; i++) {
    240 			tmpNode = rf_AllocDAGNode();
    241 			tmpNode->list_next = dag_h->nodes;
    242 			dag_h->nodes = tmpNode;
    243 		}
    244 		rodNodes = dag_h->nodes;
    245 	} else {
    246 		rodNodes = NULL;
    247 	}
    248 
    249 	/* begin node initialization */
    250 	if (nRodNodes > 0) {
    251 		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
    252 			    rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0,
    253 			    dag_h, "Nil", allocList);
    254 	} else {
    255 		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
    256 			    rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0,
    257 			    dag_h, "Nil", allocList);
    258 	}
    259 
    260 	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
    261 		    rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0,
    262 		    dag_h, "Cmt", allocList);
    263 	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
    264 		    rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0,
    265 		    dag_h, "Trm", allocList);
    266 
    267 	/* initialize the Rod nodes */
    268 	tmpNode = rodNodes;
    269 	for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
    270 		if (new_asm_h[asmNum]) {
    271 			pda = new_asm_h[asmNum]->stripeMap->physInfo;
    272 			while (pda) {
    273 				rf_InitNode(tmpNode, rf_wait,
    274 					    RF_FALSE, rf_DiskReadFunc,
    275 					    rf_DiskReadUndoFunc,
    276 					    rf_GenericWakeupFunc,
    277 					    1, 1, 4, 0, dag_h,
    278 					    "Rod", allocList);
    279 				tmpNode->params[0].p = pda;
    280 				tmpNode->params[1].p = pda->bufPtr;
    281 				tmpNode->params[2].v = parityStripeID;
    282 				tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    283 				    which_ru);
    284 				nodeNum++;
    285 				pda = pda->next;
    286 				tmpNode = tmpNode->list_next;
    287 			}
    288 		}
    289 	}
    290 	RF_ASSERT(nodeNum == nRodNodes);
    291 
    292 	/* initialize the wnd nodes */
    293 	pda = asmap->physInfo;
    294 	tmpNode = wndNodes;
    295 	for (i = 0; i < nWndNodes; i++) {
    296 		rf_InitNode(tmpNode, rf_wait, RF_FALSE,
    297 			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    298 			    rf_GenericWakeupFunc, 1, 1, 4, 0,
    299 			    dag_h, "Wnd", allocList);
    300 		RF_ASSERT(pda != NULL);
    301 		tmpNode->params[0].p = pda;
    302 		tmpNode->params[1].p = pda->bufPtr;
    303 		tmpNode->params[2].v = parityStripeID;
    304 		tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
    305 		pda = pda->next;
    306 		tmpNode = tmpNode->list_next;
    307 	}
    308 
    309 	/* initialize the redundancy node */
    310 	if (nRodNodes > 0) {
    311 		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
    312 			    rf_NullNodeUndoFunc, NULL, 1,
    313 			    nRodNodes, 2 * (nWndNodes + nRodNodes) + 1,
    314 			    nfaults, dag_h, "Xr ", allocList);
    315 	} else {
    316 		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
    317 			    rf_NullNodeUndoFunc, NULL, 1,
    318 			    1, 2 * (nWndNodes + nRodNodes) + 1,
    319 			    nfaults, dag_h, "Xr ", allocList);
    320 	}
    321 	xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
    322 	tmpNode = wndNodes;
    323 	for (i = 0; i < nWndNodes; i++) {
    324 		/* pda */
    325 		xorNode->params[2 * i + 0] = tmpNode->params[0];
    326 		/* buf ptr */
    327 		xorNode->params[2 * i + 1] = tmpNode->params[1];
    328 		tmpNode = tmpNode->list_next;
    329 	}
    330 	tmpNode = rodNodes;
    331 	for (i = 0; i < nRodNodes; i++) {
    332 		/* pda */
    333 		xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0];
    334 		/* buf ptr */
    335 		xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1];
    336 		tmpNode = tmpNode->list_next;
    337 	}
    338 	/* xor node needs to get at RAID information */
    339 	xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
    340 
    341 	/*
    342          * Look for an Rod node that reads a complete SU. If none,
    343          * alloc a buffer to receive the parity info. Note that we
    344          * can't use a new data buffer because it will not have gotten
    345          * written when the xor occurs.  */
    346 	if (allowBufferRecycle) {
    347 		tmpNode = rodNodes;
    348 		for (i = 0; i < nRodNodes; i++) {
    349 			if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
    350 				break;
    351 			tmpNode = tmpNode->list_next;
    352 		}
    353 	}
    354 	if ((!allowBufferRecycle) || (i == nRodNodes)) {
    355 		xorNode->results[0] = rf_AllocIOBuffer(raidPtr,
    356 						       rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit));
    357 		vple = rf_AllocVPListElem();
    358 		vple->p = xorNode->results[0];
    359 		vple->next = dag_h->iobufs;
    360 		dag_h->iobufs = vple;
    361 	} else {
    362 		/* this works because the only way we get here is if
    363 		   allowBufferRecycle is true and we went through the
    364 		   above for loop, and exited via the break before
    365 		   i==nRodNodes was true.  That means tmpNode will
    366 		   still point to a valid node -- the one we want for
    367 		   here! */
    368 		xorNode->results[0] = tmpNode->params[1].p;
    369 	}
    370 
    371 	/* initialize the Wnp node */
    372 	rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
    373 		    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
    374 		    dag_h, "Wnp", allocList);
    375 	wnpNode->params[0].p = asmap->parityInfo;
    376 	wnpNode->params[1].p = xorNode->results[0];
    377 	wnpNode->params[2].v = parityStripeID;
    378 	wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
    379 	/* parityInfo must describe entire parity unit */
    380 	RF_ASSERT(asmap->parityInfo->next == NULL);
    381 
    382 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    383 	if (nfaults == 2) {
    384 		/*
    385 	         * We never try to recycle a buffer for the Q calcuation
    386 	         * in addition to the parity. This would cause two buffers
    387 	         * to get smashed during the P and Q calculation, guaranteeing
    388 	         * one would be wrong.
    389 	         */
    390 		RF_MallocAndAdd(xorNode->results[1],
    391 				rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
    392 				(void *), allocList);
    393 		rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
    394 			    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc,
    395 			    1, 1, 4, 0, dag_h, "Wnq", allocList);
    396 		wnqNode->params[0].p = asmap->qInfo;
    397 		wnqNode->params[1].p = xorNode->results[1];
    398 		wnqNode->params[2].v = parityStripeID;
    399 		wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
    400 		/* parityInfo must describe entire parity unit */
    401 		RF_ASSERT(asmap->parityInfo->next == NULL);
    402 	}
    403 #endif
    404 	/*
    405          * Connect nodes to form graph.
    406          */
    407 
    408 	/* connect dag header to block node */
    409 	RF_ASSERT(blockNode->numAntecedents == 0);
    410 	dag_h->succedents[0] = blockNode;
    411 
    412 	if (nRodNodes > 0) {
    413 		/* connect the block node to the Rod nodes */
    414 		RF_ASSERT(blockNode->numSuccedents == nRodNodes);
    415 		RF_ASSERT(xorNode->numAntecedents == nRodNodes);
    416 		tmpNode = rodNodes;
    417 		for (i = 0; i < nRodNodes; i++) {
    418 			RF_ASSERT(tmpNode.numAntecedents == 1);
    419 			blockNode->succedents[i] = tmpNode;
    420 			tmpNode->antecedents[0] = blockNode;
    421 			tmpNode->antType[0] = rf_control;
    422 
    423 			/* connect the Rod nodes to the Xor node */
    424 			RF_ASSERT(tmpNode.numSuccedents == 1);
    425 			tmpNode->succedents[0] = xorNode;
    426 			xorNode->antecedents[i] = tmpNode;
    427 			xorNode->antType[i] = rf_trueData;
    428 			tmpNode = tmpNode->list_next;
    429 		}
    430 	} else {
    431 		/* connect the block node to the Xor node */
    432 		RF_ASSERT(blockNode->numSuccedents == 1);
    433 		RF_ASSERT(xorNode->numAntecedents == 1);
    434 		blockNode->succedents[0] = xorNode;
    435 		xorNode->antecedents[0] = blockNode;
    436 		xorNode->antType[0] = rf_control;
    437 	}
    438 
    439 	/* connect the xor node to the commit node */
    440 	RF_ASSERT(xorNode->numSuccedents == 1);
    441 	RF_ASSERT(commitNode->numAntecedents == 1);
    442 	xorNode->succedents[0] = commitNode;
    443 	commitNode->antecedents[0] = xorNode;
    444 	commitNode->antType[0] = rf_control;
    445 
    446 	/* connect the commit node to the write nodes */
    447 	RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
    448 	tmpNode = wndNodes;
    449 	for (i = 0; i < nWndNodes; i++) {
    450 		RF_ASSERT(wndNodes->numAntecedents == 1);
    451 		commitNode->succedents[i] = tmpNode;
    452 		tmpNode->antecedents[0] = commitNode;
    453 		tmpNode->antType[0] = rf_control;
    454 		tmpNode = tmpNode->list_next;
    455 	}
    456 	RF_ASSERT(wnpNode->numAntecedents == 1);
    457 	commitNode->succedents[nWndNodes] = wnpNode;
    458 	wnpNode->antecedents[0] = commitNode;
    459 	wnpNode->antType[0] = rf_trueData;
    460 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    461 	if (nfaults == 2) {
    462 		RF_ASSERT(wnqNode->numAntecedents == 1);
    463 		commitNode->succedents[nWndNodes + 1] = wnqNode;
    464 		wnqNode->antecedents[0] = commitNode;
    465 		wnqNode->antType[0] = rf_trueData;
    466 	}
    467 #endif
    468 	/* connect the write nodes to the term node */
    469 	RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
    470 	RF_ASSERT(termNode->numSuccedents == 0);
    471 	tmpNode = wndNodes;
    472 	for (i = 0; i < nWndNodes; i++) {
    473 		RF_ASSERT(wndNodes->numSuccedents == 1);
    474 		tmpNode->succedents[0] = termNode;
    475 		termNode->antecedents[i] = tmpNode;
    476 		termNode->antType[i] = rf_control;
    477 		tmpNode = tmpNode->list_next;
    478 	}
    479 	RF_ASSERT(wnpNode->numSuccedents == 1);
    480 	wnpNode->succedents[0] = termNode;
    481 	termNode->antecedents[nWndNodes] = wnpNode;
    482 	termNode->antType[nWndNodes] = rf_control;
    483 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    484 	if (nfaults == 2) {
    485 		RF_ASSERT(wnqNode->numSuccedents == 1);
    486 		wnqNode->succedents[0] = termNode;
    487 		termNode->antecedents[nWndNodes + 1] = wnqNode;
    488 		termNode->antType[nWndNodes + 1] = rf_control;
    489 	}
    490 #endif
    491 }
    492 /******************************************************************************
    493  *
    494  * creates a DAG to perform a small-write operation (either raid 5 or pq),
    495  * which is as follows:
    496  *
    497  * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
    498  *            \- Rod X      /     \----> Wnd [Und]-/
    499  *           [\- Rod X     /       \---> Wnd [Und]-/]
    500  *           [\- Roq -> Q /         \--> Wnq [Unq]-/]
    501  *
    502  * Rop = read old parity
    503  * Rod = read old data
    504  * Roq = read old "q"
    505  * Cmt = commit node
    506  * Und = unlock data disk
    507  * Unp = unlock parity disk
    508  * Unq = unlock q disk
    509  * Wnp = write new parity
    510  * Wnd = write new data
    511  * Wnq = write new "q"
    512  * [ ] denotes optional segments in the graph
    513  *
    514  * Parameters:  raidPtr   - description of the physical array
    515  *              asmap     - logical & physical addresses for this access
    516  *              bp        - buffer ptr (holds write data)
    517  *              flags     - general flags (e.g. disk locking)
    518  *              allocList - list of memory allocated in DAG creation
    519  *              pfuncs    - list of parity generating functions
    520  *              qfuncs    - list of q generating functions
    521  *
    522  * A null qfuncs indicates single fault tolerant
    523  *****************************************************************************/
    524 
    525 void
    526 rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
    527 			     RF_DagHeader_t *dag_h, void *bp,
    528 			     RF_RaidAccessFlags_t flags,
    529 			     RF_AllocListElem_t *allocList,
    530 			     const RF_RedFuncs_t *pfuncs,
    531 			     const RF_RedFuncs_t *qfuncs)
    532 {
    533 	RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode;
    534 	RF_DagNode_t *tmpNode, *tmpreadDataNode, *tmpreadParityNode;
    535 	RF_DagNode_t *xorNodes, *qNodes, *blockNode, *commitNode;
    536 	RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes;
    537 	RF_DagNode_t *tmpxorNode, *tmpqNode, *tmpwriteDataNode, *tmpreadQNode;
    538 	RF_DagNode_t *tmpwriteParityNode;
    539 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    540 	RF_DagNode_t *tmpwriteQNode;
    541 #endif
    542 	int     i, j, nNodes, totalNumNodes;
    543 	RF_ReconUnitNum_t which_ru;
    544 	int     (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
    545 	int     (*qfunc) (RF_DagNode_t *);
    546 	int     numDataNodes, numParityNodes;
    547 	RF_StripeNum_t parityStripeID;
    548 	RF_PhysDiskAddr_t *pda;
    549 	char   *name, *qname;
    550 	long    nfaults;
    551 
    552 	nfaults = qfuncs ? 2 : 1;
    553 
    554 	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
    555 	    asmap->raidAddress, &which_ru);
    556 	pda = asmap->physInfo;
    557 	numDataNodes = asmap->numStripeUnitsAccessed;
    558 	numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
    559 
    560 #if RF_DEBUG_DAG
    561 	if (rf_dagDebug) {
    562 		printf("[Creating small-write DAG]\n");
    563 	}
    564 #endif
    565 	RF_ASSERT(numDataNodes > 0);
    566 	dag_h->creator = "SmallWriteDAG";
    567 
    568 	dag_h->numCommitNodes = 1;
    569 	dag_h->numCommits = 0;
    570 	dag_h->numSuccedents = 1;
    571 
    572 	/*
    573          * DAG creation occurs in four steps:
    574          * 1. count the number of nodes in the DAG
    575          * 2. create the nodes
    576          * 3. initialize the nodes
    577          * 4. connect the nodes
    578          */
    579 
    580 	/*
    581          * Step 1. compute number of nodes in the graph
    582          */
    583 
    584 	/* number of nodes: a read and write for each data unit a
    585 	 * redundancy computation node for each parity node (nfaults *
    586 	 * nparity) a read and write for each parity unit a block and
    587 	 * commit node (2) a terminate node if atomic RMW an unlock
    588 	 * node for each data unit, redundancy unit */
    589 	totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
    590 	    + (nfaults * 2 * numParityNodes) + 3;
    591 	/*
    592          * Step 2. create the nodes
    593          */
    594 
    595 	blockNode = rf_AllocDAGNode();
    596 	blockNode->list_next = dag_h->nodes;
    597 	dag_h->nodes = blockNode;
    598 
    599 	commitNode = rf_AllocDAGNode();
    600 	commitNode->list_next = dag_h->nodes;
    601 	dag_h->nodes = commitNode;
    602 
    603 	for (i = 0; i < numDataNodes; i++) {
    604 		tmpNode = rf_AllocDAGNode();
    605 		tmpNode->list_next = dag_h->nodes;
    606 		dag_h->nodes = tmpNode;
    607 	}
    608 	readDataNodes = dag_h->nodes;
    609 
    610 	for (i = 0; i < numParityNodes; i++) {
    611 		tmpNode = rf_AllocDAGNode();
    612 		tmpNode->list_next = dag_h->nodes;
    613 		dag_h->nodes = tmpNode;
    614 	}
    615 	readParityNodes = dag_h->nodes;
    616 
    617 	for (i = 0; i < numDataNodes; i++) {
    618 		tmpNode = rf_AllocDAGNode();
    619 		tmpNode->list_next = dag_h->nodes;
    620 		dag_h->nodes = tmpNode;
    621 	}
    622 	writeDataNodes = dag_h->nodes;
    623 
    624 	for (i = 0; i < numParityNodes; i++) {
    625 		tmpNode = rf_AllocDAGNode();
    626 		tmpNode->list_next = dag_h->nodes;
    627 		dag_h->nodes = tmpNode;
    628 	}
    629 	writeParityNodes = dag_h->nodes;
    630 
    631 	for (i = 0; i < numParityNodes; i++) {
    632 		tmpNode = rf_AllocDAGNode();
    633 		tmpNode->list_next = dag_h->nodes;
    634 		dag_h->nodes = tmpNode;
    635 	}
    636 	xorNodes = dag_h->nodes;
    637 
    638 	termNode = rf_AllocDAGNode();
    639 	termNode->list_next = dag_h->nodes;
    640 	dag_h->nodes = termNode;
    641 
    642 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    643 	if (nfaults == 2) {
    644 		for (i = 0; i < numParityNodes; i++) {
    645 			tmpNode = rf_AllocDAGNode();
    646 			tmpNode->list_next = dag_h->nodes;
    647 			dag_h->nodes = tmpNode;
    648 		}
    649 		readQNodes = dag_h->nodes;
    650 
    651 		for (i = 0; i < numParityNodes; i++) {
    652 			tmpNode = rf_AllocDAGNode();
    653 			tmpNode->list_next = dag_h->nodes;
    654 			dag_h->nodes = tmpNode;
    655 		}
    656 		writeQNodes = dag_h->nodes;
    657 
    658 		for (i = 0; i < numParityNodes; i++) {
    659 			tmpNode = rf_AllocDAGNode();
    660 			tmpNode->list_next = dag_h->nodes;
    661 			dag_h->nodes = tmpNode;
    662 		}
    663 		qNodes = dag_h->nodes;
    664 	} else {
    665 #endif
    666 		readQNodes = writeQNodes = qNodes = NULL;
    667 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    668 	}
    669 #endif
    670 	RF_ASSERT(i == totalNumNodes);
    671 
    672 	/*
    673          * Step 3. initialize the nodes
    674          */
    675 	/* initialize block node (Nil) */
    676 	nNodes = numDataNodes + (nfaults * numParityNodes);
    677 	rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
    678 		    rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0,
    679 		    dag_h, "Nil", allocList);
    680 
    681 	/* initialize commit node (Cmt) */
    682 	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
    683 		    rf_NullNodeUndoFunc, NULL, nNodes,
    684 		    (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
    685 
    686 	/* initialize terminate node (Trm) */
    687 	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
    688 		    rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0,
    689 		    dag_h, "Trm", allocList);
    690 
    691 	/* initialize nodes which read old data (Rod) */
    692 	tmpreadDataNode = readDataNodes;
    693 	for (i = 0; i < numDataNodes; i++) {
    694 		rf_InitNode(tmpreadDataNode, rf_wait, RF_FALSE,
    695 			    rf_DiskReadFunc, rf_DiskReadUndoFunc,
    696 			    rf_GenericWakeupFunc, (nfaults * numParityNodes),
    697 			    1, 4, 0, dag_h, "Rod", allocList);
    698 		RF_ASSERT(pda != NULL);
    699 		/* physical disk addr desc */
    700 		tmpreadDataNode->params[0].p = pda;
    701 		/* buffer to hold old data */
    702 		tmpreadDataNode->params[1].p = rf_AllocBuffer(raidPtr, pda, allocList);
    703 		tmpreadDataNode->params[2].v = parityStripeID;
    704 		tmpreadDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    705 		    which_ru);
    706 		pda = pda->next;
    707 		for (j = 0; j < tmpreadDataNode->numSuccedents; j++) {
    708 			tmpreadDataNode->propList[j] = NULL;
    709 		}
    710 		tmpreadDataNode = tmpreadDataNode->list_next;
    711 	}
    712 
    713 	/* initialize nodes which read old parity (Rop) */
    714 	pda = asmap->parityInfo;
    715 	i = 0;
    716 	tmpreadParityNode = readParityNodes;
    717 	for (i = 0; i < numParityNodes; i++) {
    718 		RF_ASSERT(pda != NULL);
    719 		rf_InitNode(tmpreadParityNode, rf_wait, RF_FALSE,
    720 			    rf_DiskReadFunc, rf_DiskReadUndoFunc,
    721 			    rf_GenericWakeupFunc, numParityNodes, 1, 4, 0,
    722 			    dag_h, "Rop", allocList);
    723 		tmpreadParityNode->params[0].p = pda;
    724 		/* buffer to hold old parity */
    725 		tmpreadParityNode->params[1].p = rf_AllocBuffer(raidPtr, pda, allocList);
    726 		tmpreadParityNode->params[2].v = parityStripeID;
    727 		tmpreadParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    728 		    which_ru);
    729 		pda = pda->next;
    730 		for (j = 0; j < tmpreadParityNode->numSuccedents; j++) {
    731 			tmpreadParityNode->propList[0] = NULL;
    732 		}
    733 		tmpreadParityNode = tmpreadParityNode->list_next;
    734 	}
    735 
    736 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    737 	/* initialize nodes which read old Q (Roq) */
    738 	if (nfaults == 2) {
    739 		pda = asmap->qInfo;
    740 		tmpreadQNode = readQNodes;
    741 		for (i = 0; i < numParityNodes; i++) {
    742 			RF_ASSERT(pda != NULL);
    743 			rf_InitNode(tmpreadQNode, rf_wait, RF_FALSE,
    744 				    rf_DiskReadFunc, rf_DiskReadUndoFunc,
    745 				    rf_GenericWakeupFunc, numParityNodes,
    746 				    1, 4, 0, dag_h, "Roq", allocList);
    747 			tmpreadQNode->params[0].p = pda;
    748 			/* buffer to hold old Q */
    749 			tmpreadQNode->params[1].p = rf_AllocBuffer(raidPtr, pda, allocList);
    750 			tmpreadQNode->params[2].v = parityStripeID;
    751 			tmpreadQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    752 			    which_ru);
    753 			pda = pda->next;
    754 			for (j = 0; j < tmpreadQNode->numSuccedents; j++) {
    755 				tmpreadQNode->propList[0] = NULL;
    756 			}
    757 			tmpreadQNode = tmpreadQNode->list_next;
    758 		}
    759 	}
    760 #endif
    761 	/* initialize nodes which write new data (Wnd) */
    762 	pda = asmap->physInfo;
    763 	tmpwriteDataNode = writeDataNodes;
    764 	for (i = 0; i < numDataNodes; i++) {
    765 		RF_ASSERT(pda != NULL);
    766 		rf_InitNode(tmpwriteDataNode, rf_wait, RF_FALSE,
    767 			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    768 			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
    769 			    "Wnd", allocList);
    770 		/* physical disk addr desc */
    771 		tmpwriteDataNode->params[0].p = pda;
    772 		/* buffer holding new data to be written */
    773 		tmpwriteDataNode->params[1].p = pda->bufPtr;
    774 		tmpwriteDataNode->params[2].v = parityStripeID;
    775 		tmpwriteDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    776 		    which_ru);
    777 		pda = pda->next;
    778 		tmpwriteDataNode = tmpwriteDataNode->list_next;
    779 	}
    780 
    781 	/*
    782          * Initialize nodes which compute new parity and Q.
    783          */
    784 	/*
    785          * We use the simple XOR func in the double-XOR case, and when
    786          * we're accessing only a portion of one stripe unit. The
    787          * distinction between the two is that the regular XOR func
    788          * assumes that the targbuf is a full SU in size, and examines
    789          * the pda associated with the buffer to decide where within
    790          * the buffer to XOR the data, whereas the simple XOR func
    791          * just XORs the data into the start of the buffer.  */
    792 	if ((numParityNodes == 2) || ((numDataNodes == 1)
    793 		&& (asmap->totalSectorsAccessed <
    794 		    raidPtr->Layout.sectorsPerStripeUnit))) {
    795 		func = pfuncs->simple;
    796 		undoFunc = rf_NullNodeUndoFunc;
    797 		name = pfuncs->SimpleName;
    798 		if (qfuncs) {
    799 			qfunc = qfuncs->simple;
    800 			qname = qfuncs->SimpleName;
    801 		} else {
    802 			qfunc = NULL;
    803 			qname = NULL;
    804 		}
    805 	} else {
    806 		func = pfuncs->regular;
    807 		undoFunc = rf_NullNodeUndoFunc;
    808 		name = pfuncs->RegularName;
    809 		if (qfuncs) {
    810 			qfunc = qfuncs->regular;
    811 			qname = qfuncs->RegularName;
    812 		} else {
    813 			qfunc = NULL;
    814 			qname = NULL;
    815 		}
    816 	}
    817 	/*
    818          * Initialize the xor nodes: params are {pda,buf}
    819          * from {Rod,Wnd,Rop} nodes, and raidPtr
    820          */
    821 	if (numParityNodes == 2) {
    822 		/* double-xor case */
    823 		tmpxorNode = xorNodes;
    824 		tmpreadDataNode = readDataNodes;
    825 		tmpreadParityNode = readParityNodes;
    826 		tmpwriteDataNode = writeDataNodes;
    827 		tmpqNode = qNodes;
    828 		tmpreadQNode = readQNodes;
    829 		for (i = 0; i < numParityNodes; i++) {
    830 			/* note: no wakeup func for xor */
    831 			rf_InitNode(tmpxorNode, rf_wait, RF_FALSE, func,
    832 				    undoFunc, NULL, 1,
    833 				    (numDataNodes + numParityNodes),
    834 				    7, 1, dag_h, name, allocList);
    835 			tmpxorNode->flags |= RF_DAGNODE_FLAG_YIELD;
    836 			tmpxorNode->params[0] = tmpreadDataNode->params[0];
    837 			tmpxorNode->params[1] = tmpreadDataNode->params[1];
    838 			tmpxorNode->params[2] = tmpreadParityNode->params[0];
    839 			tmpxorNode->params[3] = tmpreadParityNode->params[1];
    840 			tmpxorNode->params[4] = tmpwriteDataNode->params[0];
    841 			tmpxorNode->params[5] = tmpwriteDataNode->params[1];
    842 			tmpxorNode->params[6].p = raidPtr;
    843 			/* use old parity buf as target buf */
    844 			tmpxorNode->results[0] = tmpreadParityNode->params[1].p;
    845 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    846 			if (nfaults == 2) {
    847 				/* note: no wakeup func for qor */
    848 				rf_InitNode(tmpqNode, rf_wait, RF_FALSE,
    849 					    qfunc, undoFunc, NULL, 1,
    850 					    (numDataNodes + numParityNodes),
    851 					    7, 1, dag_h, qname, allocList);
    852 				tmpqNode->params[0] = tmpreadDataNode->params[0];
    853 				tmpqNode->params[1] = tmpreadDataNode->params[1];
    854 				tmpqNode->params[2] = tmpreadQNode->.params[0];
    855 				tmpqNode->params[3] = tmpreadQNode->params[1];
    856 				tmpqNode->params[4] = tmpwriteDataNode->params[0];
    857 				tmpqNode->params[5] = tmpwriteDataNode->params[1];
    858 				tmpqNode->params[6].p = raidPtr;
    859 				/* use old Q buf as target buf */
    860 				tmpqNode->results[0] = tmpreadQNode->params[1].p;
    861 				tmpqNode = tmpqNode->list_next;
    862 				tmpreadQNodes = tmpreadQNodes->list_next;
    863 			}
    864 #endif
    865 			tmpxorNode = tmpxorNode->list_next;
    866 			tmpreadDataNode = tmpreadDataNode->list_next;
    867 			tmpreadParityNode = tmpreadParityNode->list_next;
    868 			tmpwriteDataNode = tmpwriteDataNode->list_next;
    869 		}
    870 	} else {
    871 		/* there is only one xor node in this case */
    872 		rf_InitNode(xorNodes, rf_wait, RF_FALSE, func,
    873 			    undoFunc, NULL, 1, (numDataNodes + numParityNodes),
    874 			    (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
    875 			    dag_h, name, allocList);
    876 		xorNodes->flags |= RF_DAGNODE_FLAG_YIELD;
    877 		tmpreadDataNode = readDataNodes;
    878 		for (i = 0; i < numDataNodes; i++) { /* used to be"numDataNodes + 1" until we factored
    879 							out the "+1" into the "deal with Rop separately below */
    880 			/* set up params related to Rod nodes */
    881 			xorNodes->params[2 * i + 0] = tmpreadDataNode->params[0];	/* pda */
    882 			xorNodes->params[2 * i + 1] = tmpreadDataNode->params[1];	/* buffer ptr */
    883 			tmpreadDataNode = tmpreadDataNode->list_next;
    884 		}
    885 		/* deal with Rop separately */
    886 		xorNodes->params[2 * numDataNodes + 0] = readParityNodes->params[0];    /* pda */
    887 		xorNodes->params[2 * numDataNodes + 1] = readParityNodes->params[1];    /* buffer ptr */
    888 
    889 		tmpwriteDataNode = writeDataNodes;
    890 		for (i = 0; i < numDataNodes; i++) {
    891 			/* set up params related to Wnd and Wnp nodes */
    892 			xorNodes->params[2 * (numDataNodes + 1 + i) + 0] =	/* pda */
    893 			    tmpwriteDataNode->params[0];
    894 			xorNodes->params[2 * (numDataNodes + 1 + i) + 1] =	/* buffer ptr */
    895 			    tmpwriteDataNode->params[1];
    896 			tmpwriteDataNode = tmpwriteDataNode->list_next;
    897 		}
    898 		/* xor node needs to get at RAID information */
    899 		xorNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
    900 		xorNodes->results[0] = readParityNodes->params[1].p;
    901 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    902 		if (nfaults == 2) {
    903 			rf_InitNode(qNodes, rf_wait, RF_FALSE, qfunc,
    904 				    undoFunc, NULL, 1,
    905 				    (numDataNodes + numParityNodes),
    906 				    (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
    907 				    dag_h, qname, allocList);
    908 			tmpreadDataNode = readDataNodes;
    909 			for (i = 0; i < numDataNodes; i++) {
    910 				/* set up params related to Rod */
    911 				qNodes->params[2 * i + 0] = tmpreadDataNode->params[0];	/* pda */
    912 				qNodes->params[2 * i + 1] = tmpreadDataNode->params[1];	/* buffer ptr */
    913 				tmpreadDataNode = tmpreadDataNode->list_next;
    914 			}
    915 			/* and read old q */
    916 			qNodes->params[2 * numDataNodes + 0] =	/* pda */
    917 			    readQNodes->params[0];
    918 			qNodes->params[2 * numDataNodes + 1] =	/* buffer ptr */
    919 			    readQNodes->params[1];
    920 			tmpwriteDataNode = writeDataNodes;
    921 			for (i = 0; i < numDataNodes; i++) {
    922 				/* set up params related to Wnd nodes */
    923 				qNodes->params[2 * (numDataNodes + 1 + i) + 0] =	/* pda */
    924 				    tmpwriteDataNode->params[0];
    925 				qNodes->params[2 * (numDataNodes + 1 + i) + 1] =	/* buffer ptr */
    926 				    tmpwriteDataNode->params[1];
    927 				tmpwriteDataNode = tmpwriteDataNode->list_next;
    928 			}
    929 			/* xor node needs to get at RAID information */
    930 			qNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
    931 			qNodes->results[0] = readQNodes->params[1].p;
    932 		}
    933 #endif
    934 	}
    935 
    936 	/* initialize nodes which write new parity (Wnp) */
    937 	pda = asmap->parityInfo;
    938 	tmpwriteParityNode = writeParityNodes;
    939 	tmpxorNode = xorNodes;
    940 	for (i = 0; i < numParityNodes; i++) {
    941 		rf_InitNode(tmpwriteParityNode, rf_wait, RF_FALSE,
    942 			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    943 			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
    944 			    "Wnp", allocList);
    945 		RF_ASSERT(pda != NULL);
    946 		tmpwriteParityNode->params[0].p = pda;	/* param 1 (bufPtr)
    947 				  			 * filled in by xor node */
    948 		tmpwriteParityNode->params[1].p = tmpxorNode->results[0];	/* buffer pointer for
    949 				  						 * parity write
    950 				  						 * operation */
    951 		tmpwriteParityNode->params[2].v = parityStripeID;
    952 		tmpwriteParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    953 		    which_ru);
    954 		pda = pda->next;
    955 		tmpwriteParityNode = tmpwriteParityNode->list_next;
    956 		tmpxorNode = tmpxorNode->list_next;
    957 	}
    958 
    959 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    960 	/* initialize nodes which write new Q (Wnq) */
    961 	if (nfaults == 2) {
    962 		pda = asmap->qInfo;
    963 		tmpwriteQNode = writeQNodes;
    964 		tmpqNode = qNodes;
    965 		for (i = 0; i < numParityNodes; i++) {
    966 			rf_InitNode(tmpwriteQNode, rf_wait, RF_FALSE,
    967 				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    968 				    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
    969 				    "Wnq", allocList);
    970 			RF_ASSERT(pda != NULL);
    971 			tmpwriteQNode->params[0].p = pda;	/* param 1 (bufPtr)
    972 								 * filled in by xor node */
    973 			tmpwriteQNode->params[1].p = tmpqNode->results[0];	/* buffer pointer for
    974 										 * parity write
    975 										 * operation */
    976 			tmpwriteQNode->params[2].v = parityStripeID;
    977 			tmpwriteQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    978 			    which_ru);
    979 			pda = pda->next;
    980 			tmpwriteQNode = tmpwriteQNode->list_next;
    981 			tmpqNode = tmpqNode->list_next;
    982 		}
    983 	}
    984 #endif
    985 	/*
    986          * Step 4. connect the nodes.
    987          */
    988 
    989 	/* connect header to block node */
    990 	dag_h->succedents[0] = blockNode;
    991 
    992 	/* connect block node to read old data nodes */
    993 	RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
    994 	tmpreadDataNode = readDataNodes;
    995 	for (i = 0; i < numDataNodes; i++) {
    996 		blockNode->succedents[i] = tmpreadDataNode;
    997 		RF_ASSERT(tmpreadDataNode->numAntecedents == 1);
    998 		tmpreadDataNode->antecedents[0] = blockNode;
    999 		tmpreadDataNode->antType[0] = rf_control;
   1000 		tmpreadDataNode = tmpreadDataNode->list_next;
   1001 	}
   1002 
   1003 	/* connect block node to read old parity nodes */
   1004 	tmpreadParityNode = readParityNodes;
   1005 	for (i = 0; i < numParityNodes; i++) {
   1006 		blockNode->succedents[numDataNodes + i] = tmpreadParityNode;
   1007 		RF_ASSERT(tmpreadParityNode->numAntecedents == 1);
   1008 		tmpreadParityNode->antecedents[0] = blockNode;
   1009 		tmpreadParityNode->antType[0] = rf_control;
   1010 		tmpreadParityNode = tmpreadParityNode->list_next;
   1011 	}
   1012 
   1013 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   1014 	/* connect block node to read old Q nodes */
   1015 	if (nfaults == 2) {
   1016 		tmpreadQNode = readQNodes;
   1017 		for (i = 0; i < numParityNodes; i++) {
   1018 			blockNode->succedents[numDataNodes + numParityNodes + i] = tmpreadQNode;
   1019 			RF_ASSERT(tmpreadQNode->numAntecedents == 1);
   1020 			tmpreadQNode->antecedents[0] = blockNode;
   1021 			tmpreadQNode->antType[0] = rf_control;
   1022 			tmpreadQNode = tmpreadQNode->list_next;
   1023 		}
   1024 	}
   1025 #endif
   1026 	/* connect read old data nodes to xor nodes */
   1027 	tmpreadDataNode = readDataNodes;
   1028 	for (i = 0; i < numDataNodes; i++) {
   1029 		RF_ASSERT(tmpreadDataNode->numSuccedents == (nfaults * numParityNodes));
   1030 		tmpxorNode = xorNodes;
   1031 		for (j = 0; j < numParityNodes; j++) {
   1032 			RF_ASSERT(tmpxorNode->numAntecedents == numDataNodes + numParityNodes);
   1033 			tmpreadDataNode->succedents[j] = tmpxorNode;
   1034 			tmpxorNode->antecedents[i] = tmpreadDataNode;
   1035 			tmpxorNode->antType[i] = rf_trueData;
   1036 			tmpxorNode = tmpxorNode->list_next;
   1037 		}
   1038 		tmpreadDataNode = tmpreadDataNode->list_next;
   1039 	}
   1040 
   1041 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   1042 	/* connect read old data nodes to q nodes */
   1043 	if (nfaults == 2) {
   1044 		tmpreadDataNode = readDataNodes;
   1045 		for (i = 0; i < numDataNodes; i++) {
   1046 			tmpqNode = qNodes;
   1047 			for (j = 0; j < numParityNodes; j++) {
   1048 				RF_ASSERT(tmpqNode->numAntecedents == numDataNodes + numParityNodes);
   1049 				tmpreadDataNode->succedents[numParityNodes + j] = tmpqNode;
   1050 				tmpqNode->antecedents[i] = tmpreadDataNode;
   1051 				tmpqNode->antType[i] = rf_trueData;
   1052 				tmpqNode = tmpqNode->list_next;
   1053 			}
   1054 			tmpreadDataNode = tmpreadDataNode->list_next;
   1055 		}
   1056 	}
   1057 #endif
   1058 	/* connect read old parity nodes to xor nodes */
   1059 	tmpreadParityNode = readParityNodes;
   1060 	for (i = 0; i < numParityNodes; i++) {
   1061 		RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
   1062 		tmpxorNode = xorNodes;
   1063 		for (j = 0; j < numParityNodes; j++) {
   1064 			tmpreadParityNode->succedents[j] = tmpxorNode;
   1065 			tmpxorNode->antecedents[numDataNodes + i] = tmpreadParityNode;
   1066 			tmpxorNode->antType[numDataNodes + i] = rf_trueData;
   1067 			tmpxorNode = tmpxorNode->list_next;
   1068 		}
   1069 		tmpreadParityNode = tmpreadParityNode->list_next;
   1070 	}
   1071 
   1072 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   1073 	/* connect read old q nodes to q nodes */
   1074 	if (nfaults == 2) {
   1075 		tmpreadParityNode = readParityNodes;
   1076 		tmpreadQNode = readQNodes;
   1077 		for (i = 0; i < numParityNodes; i++) {
   1078 			RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
   1079 			tmpqNode = qNodes;
   1080 			for (j = 0; j < numParityNodes; j++) {
   1081 				tmpreadQNode->succedents[j] = tmpqNode;
   1082 				tmpqNode->antecedents[numDataNodes + i] = tmpreadQNodes;
   1083 				tmpqNode->antType[numDataNodes + i] = rf_trueData;
   1084 				tmpqNode = tmpqNode->list_next;
   1085 			}
   1086 			tmpreadParityNode = tmpreadParityNode->list_next;
   1087 			tmpreadQNode = tmpreadQNode->list_next;
   1088 		}
   1089 	}
   1090 #endif
   1091 	/* connect xor nodes to commit node */
   1092 	RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
   1093 	tmpxorNode = xorNodes;
   1094 	for (i = 0; i < numParityNodes; i++) {
   1095 		RF_ASSERT(tmpxorNode->numSuccedents == 1);
   1096 		tmpxorNode->succedents[0] = commitNode;
   1097 		commitNode->antecedents[i] = tmpxorNode;
   1098 		commitNode->antType[i] = rf_control;
   1099 		tmpxorNode = tmpxorNode->list_next;
   1100 	}
   1101 
   1102 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   1103 	/* connect q nodes to commit node */
   1104 	if (nfaults == 2) {
   1105 		tmpqNode = qNodes;
   1106 		for (i = 0; i < numParityNodes; i++) {
   1107 			RF_ASSERT(tmpqNode->numSuccedents == 1);
   1108 			tmpqNode->succedents[0] = commitNode;
   1109 			commitNode->antecedents[i + numParityNodes] = tmpqNode;
   1110 			commitNode->antType[i + numParityNodes] = rf_control;
   1111 			tmpqNode = tmpqNode->list_next;
   1112 		}
   1113 	}
   1114 #endif
   1115 	/* connect commit node to write nodes */
   1116 	RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
   1117 	tmpwriteDataNode = writeDataNodes;
   1118 	for (i = 0; i < numDataNodes; i++) {
   1119 		RF_ASSERT(tmpwriteDataNodes->numAntecedents == 1);
   1120 		commitNode->succedents[i] = tmpwriteDataNode;
   1121 		tmpwriteDataNode->antecedents[0] = commitNode;
   1122 		tmpwriteDataNode->antType[0] = rf_trueData;
   1123 		tmpwriteDataNode = tmpwriteDataNode->list_next;
   1124 	}
   1125 	tmpwriteParityNode = writeParityNodes;
   1126 	for (i = 0; i < numParityNodes; i++) {
   1127 		RF_ASSERT(tmpwriteParityNode->numAntecedents == 1);
   1128 		commitNode->succedents[i + numDataNodes] = tmpwriteParityNode;
   1129 		tmpwriteParityNode->antecedents[0] = commitNode;
   1130 		tmpwriteParityNode->antType[0] = rf_trueData;
   1131 		tmpwriteParityNode = tmpwriteParityNode->list_next;
   1132 	}
   1133 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   1134 	if (nfaults == 2) {
   1135 		tmpwriteQNode = writeQNodes;
   1136 		for (i = 0; i < numParityNodes; i++) {
   1137 			RF_ASSERT(tmpwriteQNode->numAntecedents == 1);
   1138 			commitNode->succedents[i + numDataNodes + numParityNodes] = tmpwriteQNode;
   1139 			tmpwriteQNode->antecedents[0] = commitNode;
   1140 			tmpwriteQNode->antType[0] = rf_trueData;
   1141 			tmpwriteQNode = tmpwriteQNode->list_next;
   1142 		}
   1143 	}
   1144 #endif
   1145 	RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
   1146 	RF_ASSERT(termNode->numSuccedents == 0);
   1147 	tmpwriteDataNode = writeDataNodes;
   1148 	for (i = 0; i < numDataNodes; i++) {
   1149 		/* connect write new data nodes to term node */
   1150 		RF_ASSERT(tmpwriteDataNode->numSuccedents == 1);
   1151 		RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
   1152 		tmpwriteDataNode->succedents[0] = termNode;
   1153 		termNode->antecedents[i] = tmpwriteDataNode;
   1154 		termNode->antType[i] = rf_control;
   1155 		tmpwriteDataNode = tmpwriteDataNode->list_next;
   1156 	}
   1157 
   1158 	tmpwriteParityNode = writeParityNodes;
   1159 	for (i = 0; i < numParityNodes; i++) {
   1160 		RF_ASSERT(tmpwriteParityNode->numSuccedents == 1);
   1161 		tmpwriteParityNode->succedents[0] = termNode;
   1162 		termNode->antecedents[numDataNodes + i] = tmpwriteParityNode;
   1163 		termNode->antType[numDataNodes + i] = rf_control;
   1164 		tmpwriteParityNode = tmpwriteParityNode->list_next;
   1165 	}
   1166 
   1167 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   1168 	if (nfaults == 2) {
   1169 		tmpwriteQNode = writeQNodes;
   1170 		for (i = 0; i < numParityNodes; i++) {
   1171 			RF_ASSERT(tmpwriteQNode->numSuccedents == 1);
   1172 			tmpwriteQNode->succedents[0] = termNode;
   1173 			termNode->antecedents[numDataNodes + numParityNodes + i] = tmpwriteQNode;
   1174 			termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
   1175 			tmpwriteQNode = tmpwriteQNode->list_next;
   1176 		}
   1177 	}
   1178 #endif
   1179 }
   1180 
   1181 
   1182 /******************************************************************************
   1183  * create a write graph (fault-free or degraded) for RAID level 1
   1184  *
   1185  * Hdr -> Commit -> Wpd -> Nil -> Trm
   1186  *               -> Wsd ->
   1187  *
   1188  * The "Wpd" node writes data to the primary copy in the mirror pair
   1189  * The "Wsd" node writes data to the secondary copy in the mirror pair
   1190  *
   1191  * Parameters:  raidPtr   - description of the physical array
   1192  *              asmap     - logical & physical addresses for this access
   1193  *              bp        - buffer ptr (holds write data)
   1194  *              flags     - general flags (e.g. disk locking)
   1195  *              allocList - list of memory allocated in DAG creation
   1196  *****************************************************************************/
   1197 
   1198 void
   1199 rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
   1200 			 RF_DagHeader_t *dag_h, void *bp,
   1201 			 RF_RaidAccessFlags_t flags,
   1202 			 RF_AllocListElem_t *allocList)
   1203 {
   1204 	RF_DagNode_t *unblockNode, *termNode, *commitNode;
   1205 	RF_DagNode_t *wndNode, *wmirNode;
   1206 	RF_DagNode_t *tmpNode, *tmpwndNode, *tmpwmirNode;
   1207 	int     nWndNodes, nWmirNodes, i;
   1208 	RF_ReconUnitNum_t which_ru;
   1209 	RF_PhysDiskAddr_t *pda, *pdaP;
   1210 	RF_StripeNum_t parityStripeID;
   1211 
   1212 	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
   1213 	    asmap->raidAddress, &which_ru);
   1214 #if RF_DEBUG_DAG
   1215 	if (rf_dagDebug) {
   1216 		printf("[Creating RAID level 1 write DAG]\n");
   1217 	}
   1218 #endif
   1219 	dag_h->creator = "RaidOneWriteDAG";
   1220 
   1221 	/* 2 implies access not SU aligned */
   1222 	nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
   1223 	nWndNodes = (asmap->physInfo->next) ? 2 : 1;
   1224 
   1225 	/* alloc the Wnd nodes and the Wmir node */
   1226 	if (asmap->numDataFailed == 1)
   1227 		nWndNodes--;
   1228 	if (asmap->numParityFailed == 1)
   1229 		nWmirNodes--;
   1230 
   1231 	/* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
   1232 	 * + terminator) */
   1233 	for (i = 0; i < nWndNodes; i++) {
   1234 		tmpNode = rf_AllocDAGNode();
   1235 		tmpNode->list_next = dag_h->nodes;
   1236 		dag_h->nodes = tmpNode;
   1237 	}
   1238 	wndNode = dag_h->nodes;
   1239 
   1240 	for (i = 0; i < nWmirNodes; i++) {
   1241 		tmpNode = rf_AllocDAGNode();
   1242 		tmpNode->list_next = dag_h->nodes;
   1243 		dag_h->nodes = tmpNode;
   1244 	}
   1245 	wmirNode = dag_h->nodes;
   1246 
   1247 	commitNode = rf_AllocDAGNode();
   1248 	commitNode->list_next = dag_h->nodes;
   1249 	dag_h->nodes = commitNode;
   1250 
   1251 	unblockNode = rf_AllocDAGNode();
   1252 	unblockNode->list_next = dag_h->nodes;
   1253 	dag_h->nodes = unblockNode;
   1254 
   1255 	termNode = rf_AllocDAGNode();
   1256 	termNode->list_next = dag_h->nodes;
   1257 	dag_h->nodes = termNode;
   1258 
   1259 	/* this dag can commit immediately */
   1260 	dag_h->numCommitNodes = 1;
   1261 	dag_h->numCommits = 0;
   1262 	dag_h->numSuccedents = 1;
   1263 
   1264 	/* initialize the commit, unblock, and term nodes */
   1265 	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
   1266 		    rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes),
   1267 		    0, 0, 0, dag_h, "Cmt", allocList);
   1268 	rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
   1269 		    rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes),
   1270 		    0, 0, dag_h, "Nil", allocList);
   1271 	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
   1272 		    rf_TerminateUndoFunc, NULL, 0, 1, 0, 0,
   1273 		    dag_h, "Trm", allocList);
   1274 
   1275 	/* initialize the wnd nodes */
   1276 	if (nWndNodes > 0) {
   1277 		pda = asmap->physInfo;
   1278 		tmpwndNode = wndNode;
   1279 		for (i = 0; i < nWndNodes; i++) {
   1280 			rf_InitNode(tmpwndNode, rf_wait, RF_FALSE,
   1281 				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
   1282 				    rf_GenericWakeupFunc, 1, 1, 4, 0,
   1283 				    dag_h, "Wpd", allocList);
   1284 			RF_ASSERT(pda != NULL);
   1285 			tmpwndNode->params[0].p = pda;
   1286 			tmpwndNode->params[1].p = pda->bufPtr;
   1287 			tmpwndNode->params[2].v = parityStripeID;
   1288 			tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
   1289 			pda = pda->next;
   1290 			tmpwndNode = tmpwndNode->list_next;
   1291 		}
   1292 		RF_ASSERT(pda == NULL);
   1293 	}
   1294 	/* initialize the mirror nodes */
   1295 	if (nWmirNodes > 0) {
   1296 		pda = asmap->physInfo;
   1297 		pdaP = asmap->parityInfo;
   1298 		tmpwmirNode = wmirNode;
   1299 		for (i = 0; i < nWmirNodes; i++) {
   1300 			rf_InitNode(tmpwmirNode, rf_wait, RF_FALSE,
   1301 				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
   1302 				    rf_GenericWakeupFunc, 1, 1, 4, 0,
   1303 				    dag_h, "Wsd", allocList);
   1304 			RF_ASSERT(pda != NULL);
   1305 			tmpwmirNode->params[0].p = pdaP;
   1306 			tmpwmirNode->params[1].p = pda->bufPtr;
   1307 			tmpwmirNode->params[2].v = parityStripeID;
   1308 			tmpwmirNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
   1309 			pda = pda->next;
   1310 			pdaP = pdaP->next;
   1311 			tmpwmirNode = tmpwmirNode->list_next;
   1312 		}
   1313 		RF_ASSERT(pda == NULL);
   1314 		RF_ASSERT(pdaP == NULL);
   1315 	}
   1316 	/* link the header node to the commit node */
   1317 	RF_ASSERT(dag_h->numSuccedents == 1);
   1318 	RF_ASSERT(commitNode->numAntecedents == 0);
   1319 	dag_h->succedents[0] = commitNode;
   1320 
   1321 	/* link the commit node to the write nodes */
   1322 	RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
   1323 	tmpwndNode = wndNode;
   1324 	for (i = 0; i < nWndNodes; i++) {
   1325 		RF_ASSERT(tmpwndNode->numAntecedents == 1);
   1326 		commitNode->succedents[i] = tmpwndNode;
   1327 		tmpwndNode->antecedents[0] = commitNode;
   1328 		tmpwndNode->antType[0] = rf_control;
   1329 		tmpwndNode = tmpwndNode->list_next;
   1330 	}
   1331 	tmpwmirNode = wmirNode;
   1332 	for (i = 0; i < nWmirNodes; i++) {
   1333 		RF_ASSERT(tmpwmirNode->numAntecedents == 1);
   1334 		commitNode->succedents[i + nWndNodes] = tmpwmirNode;
   1335 		tmpwmirNode->antecedents[0] = commitNode;
   1336 		tmpwmirNode->antType[0] = rf_control;
   1337 		tmpwmirNode = tmpwmirNode->list_next;
   1338 	}
   1339 
   1340 	/* link the write nodes to the unblock node */
   1341 	RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
   1342 	tmpwndNode = wndNode;
   1343 	for (i = 0; i < nWndNodes; i++) {
   1344 		RF_ASSERT(tmpwndNode->numSuccedents == 1);
   1345 		tmpwndNode->succedents[0] = unblockNode;
   1346 		unblockNode->antecedents[i] = tmpwndNode;
   1347 		unblockNode->antType[i] = rf_control;
   1348 		tmpwndNode = tmpwndNode->list_next;
   1349 	}
   1350 	tmpwmirNode = wmirNode;
   1351 	for (i = 0; i < nWmirNodes; i++) {
   1352 		RF_ASSERT(tmpwmirNode->numSuccedents == 1);
   1353 		tmpwmirNode->succedents[0] = unblockNode;
   1354 		unblockNode->antecedents[i + nWndNodes] = tmpwmirNode;
   1355 		unblockNode->antType[i + nWndNodes] = rf_control;
   1356 		tmpwmirNode = tmpwmirNode->list_next;
   1357 	}
   1358 
   1359 	/* link the unblock node to the term node */
   1360 	RF_ASSERT(unblockNode->numSuccedents == 1);
   1361 	RF_ASSERT(termNode->numAntecedents == 1);
   1362 	RF_ASSERT(termNode->numSuccedents == 0);
   1363 	unblockNode->succedents[0] = termNode;
   1364 	termNode->antecedents[0] = unblockNode;
   1365 	termNode->antType[0] = rf_control;
   1366 }
   1367