Home | History | Annotate | Line # | Download | only in raidframe
rf_dagffwr.c revision 1.11.2.6
      1 /*	$NetBSD: rf_dagffwr.c,v 1.11.2.6 2005/11/10 14:07:40 skrll Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  * rf_dagff.c
     31  *
     32  * code for creating fault-free DAGs
     33  *
     34  */
     35 
     36 #include <sys/cdefs.h>
     37 __KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 1.11.2.6 2005/11/10 14:07:40 skrll Exp $");
     38 
     39 #include <dev/raidframe/raidframevar.h>
     40 
     41 #include "rf_raid.h"
     42 #include "rf_dag.h"
     43 #include "rf_dagutils.h"
     44 #include "rf_dagfuncs.h"
     45 #include "rf_debugMem.h"
     46 #include "rf_dagffrd.h"
     47 #include "rf_general.h"
     48 #include "rf_dagffwr.h"
     49 #include "rf_map.h"
     50 
     51 /******************************************************************************
     52  *
     53  * General comments on DAG creation:
     54  *
     55  * All DAGs in this file use roll-away error recovery.  Each DAG has a single
     56  * commit node, usually called "Cmt."  If an error occurs before the Cmt node
     57  * is reached, the execution engine will halt forward execution and work
     58  * backward through the graph, executing the undo functions.  Assuming that
     59  * each node in the graph prior to the Cmt node are undoable and atomic - or -
     60  * does not make changes to permanent state, the graph will fail atomically.
     61  * If an error occurs after the Cmt node executes, the engine will roll-forward
     62  * through the graph, blindly executing nodes until it reaches the end.
     63  * If a graph reaches the end, it is assumed to have completed successfully.
     64  *
     65  * A graph has only 1 Cmt node.
     66  *
     67  */
     68 
     69 
     70 /******************************************************************************
     71  *
     72  * The following wrappers map the standard DAG creation interface to the
     73  * DAG creation routines.  Additionally, these wrappers enable experimentation
     74  * with new DAG structures by providing an extra level of indirection, allowing
     75  * the DAG creation routines to be replaced at this single point.
     76  */
     77 
     78 
     79 void
     80 rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
     81 			      RF_DagHeader_t *dag_h, void *bp,
     82 			      RF_RaidAccessFlags_t flags,
     83 			      RF_AllocListElem_t *allocList,
     84 			      RF_IoType_t type)
     85 {
     86 	rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
     87 				 RF_IO_TYPE_WRITE);
     88 }
     89 
     90 void
     91 rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
     92 		       RF_DagHeader_t *dag_h, void *bp,
     93 		       RF_RaidAccessFlags_t flags,
     94 		       RF_AllocListElem_t *allocList,
     95 		       RF_IoType_t type)
     96 {
     97 	rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
     98 				 RF_IO_TYPE_WRITE);
     99 }
    100 
    101 void
    102 rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
    103 		       RF_DagHeader_t *dag_h, void *bp,
    104 		       RF_RaidAccessFlags_t flags,
    105 		       RF_AllocListElem_t *allocList)
    106 {
    107 	/* "normal" rollaway */
    108 	rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
    109 				     allocList, &rf_xorFuncs, NULL);
    110 }
    111 
    112 void
    113 rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
    114 		       RF_DagHeader_t *dag_h, void *bp,
    115 		       RF_RaidAccessFlags_t flags,
    116 		       RF_AllocListElem_t *allocList)
    117 {
    118 	/* "normal" rollaway */
    119 	rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
    120 				     allocList, 1, rf_RegularXorFunc, RF_TRUE);
    121 }
    122 
    123 
    124 /******************************************************************************
    125  *
    126  * DAG creation code begins here
    127  */
    128 
    129 
    130 /******************************************************************************
    131  *
    132  * creates a DAG to perform a large-write operation:
    133  *
    134  *           / Rod \           / Wnd \
    135  * H -- block- Rod - Xor - Cmt - Wnd --- T
    136  *           \ Rod /          \  Wnp /
    137  *                             \[Wnq]/
    138  *
    139  * The XOR node also does the Q calculation in the P+Q architecture.
    140  * All nodes are before the commit node (Cmt) are assumed to be atomic and
    141  * undoable - or - they make no changes to permanent state.
    142  *
    143  * Rod = read old data
    144  * Cmt = commit node
    145  * Wnp = write new parity
    146  * Wnd = write new data
    147  * Wnq = write new "q"
    148  * [] denotes optional segments in the graph
    149  *
    150  * Parameters:  raidPtr   - description of the physical array
    151  *              asmap     - logical & physical addresses for this access
    152  *              bp        - buffer ptr (holds write data)
    153  *              flags     - general flags (e.g. disk locking)
    154  *              allocList - list of memory allocated in DAG creation
    155  *              nfaults   - number of faults array can tolerate
    156  *                          (equal to # redundancy units in stripe)
    157  *              redfuncs  - list of redundancy generating functions
    158  *
    159  *****************************************************************************/
    160 
    161 void
    162 rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
    163 			     RF_DagHeader_t *dag_h, void *bp,
    164 			     RF_RaidAccessFlags_t flags,
    165 			     RF_AllocListElem_t *allocList,
    166 			     int nfaults, int (*redFunc) (RF_DagNode_t *),
    167 			     int allowBufferRecycle)
    168 {
    169 	RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode;
    170 	RF_DagNode_t *wnqNode, *blockNode, *commitNode, *termNode;
    171 	int     nWndNodes, nRodNodes, i, nodeNum, asmNum;
    172 	RF_AccessStripeMapHeader_t *new_asm_h[2];
    173 	RF_StripeNum_t parityStripeID;
    174 	char   *sosBuffer, *eosBuffer;
    175 	RF_ReconUnitNum_t which_ru;
    176 	RF_RaidLayout_t *layoutPtr;
    177 	RF_PhysDiskAddr_t *pda;
    178 
    179 	layoutPtr = &(raidPtr->Layout);
    180 	parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
    181 							asmap->raidAddress,
    182 							&which_ru);
    183 
    184 #if RF_DEBUG_DAG
    185 	if (rf_dagDebug) {
    186 		printf("[Creating large-write DAG]\n");
    187 	}
    188 #endif
    189 	dag_h->creator = "LargeWriteDAG";
    190 
    191 	dag_h->numCommitNodes = 1;
    192 	dag_h->numCommits = 0;
    193 	dag_h->numSuccedents = 1;
    194 
    195 	/* alloc the nodes: Wnd, xor, commit, block, term, and  Wnp */
    196 	nWndNodes = asmap->numStripeUnitsAccessed;
    197 
    198 	for (i = 0; i < nWndNodes; i++) {
    199 		tmpNode = rf_AllocDAGNode();
    200 		tmpNode->list_next = dag_h->nodes;
    201 		dag_h->nodes = tmpNode;
    202 	}
    203 	wndNodes = dag_h->nodes;
    204 
    205 	xorNode = rf_AllocDAGNode();
    206 	xorNode->list_next = dag_h->nodes;
    207 	dag_h->nodes = xorNode;
    208 
    209 	wnpNode = rf_AllocDAGNode();
    210 	wnpNode->list_next = dag_h->nodes;
    211 	dag_h->nodes = wnpNode;
    212 
    213 	blockNode = rf_AllocDAGNode();
    214 	blockNode->list_next = dag_h->nodes;
    215 	dag_h->nodes = blockNode;
    216 
    217 	commitNode = rf_AllocDAGNode();
    218 	commitNode->list_next = dag_h->nodes;
    219 	dag_h->nodes = commitNode;
    220 
    221 	termNode = rf_AllocDAGNode();
    222 	termNode->list_next = dag_h->nodes;
    223 	dag_h->nodes = termNode;
    224 
    225 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    226 	if (nfaults == 2) {
    227 		wnqNode = rf_AllocDAGNode();
    228 	} else {
    229 #endif
    230 		wnqNode = NULL;
    231 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    232 	}
    233 #endif
    234 	rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
    235 					new_asm_h, &nRodNodes, &sosBuffer,
    236 					&eosBuffer, allocList);
    237 	if (nRodNodes > 0) {
    238 		for (i = 0; i < nRodNodes; i++) {
    239 			tmpNode = rf_AllocDAGNode();
    240 			tmpNode->list_next = dag_h->nodes;
    241 			dag_h->nodes = tmpNode;
    242 		}
    243 		rodNodes = dag_h->nodes;
    244 	} else {
    245 		rodNodes = NULL;
    246 	}
    247 
    248 	/* begin node initialization */
    249 	if (nRodNodes > 0) {
    250 		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
    251 			    rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0,
    252 			    dag_h, "Nil", allocList);
    253 	} else {
    254 		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
    255 			    rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0,
    256 			    dag_h, "Nil", allocList);
    257 	}
    258 
    259 	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
    260 		    rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0,
    261 		    dag_h, "Cmt", allocList);
    262 	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
    263 		    rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0,
    264 		    dag_h, "Trm", allocList);
    265 
    266 	/* initialize the Rod nodes */
    267 	tmpNode = rodNodes;
    268 	for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
    269 		if (new_asm_h[asmNum]) {
    270 			pda = new_asm_h[asmNum]->stripeMap->physInfo;
    271 			while (pda) {
    272 				rf_InitNode(tmpNode, rf_wait,
    273 					    RF_FALSE, rf_DiskReadFunc,
    274 					    rf_DiskReadUndoFunc,
    275 					    rf_GenericWakeupFunc,
    276 					    1, 1, 4, 0, dag_h,
    277 					    "Rod", allocList);
    278 				tmpNode->params[0].p = pda;
    279 				tmpNode->params[1].p = pda->bufPtr;
    280 				tmpNode->params[2].v = parityStripeID;
    281 				tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    282 				    which_ru);
    283 				nodeNum++;
    284 				pda = pda->next;
    285 				tmpNode = tmpNode->list_next;
    286 			}
    287 		}
    288 	}
    289 	RF_ASSERT(nodeNum == nRodNodes);
    290 
    291 	/* initialize the wnd nodes */
    292 	pda = asmap->physInfo;
    293 	tmpNode = wndNodes;
    294 	for (i = 0; i < nWndNodes; i++) {
    295 		rf_InitNode(tmpNode, rf_wait, RF_FALSE,
    296 			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    297 			    rf_GenericWakeupFunc, 1, 1, 4, 0,
    298 			    dag_h, "Wnd", allocList);
    299 		RF_ASSERT(pda != NULL);
    300 		tmpNode->params[0].p = pda;
    301 		tmpNode->params[1].p = pda->bufPtr;
    302 		tmpNode->params[2].v = parityStripeID;
    303 		tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
    304 		pda = pda->next;
    305 		tmpNode = tmpNode->list_next;
    306 	}
    307 
    308 	/* initialize the redundancy node */
    309 	if (nRodNodes > 0) {
    310 		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
    311 			    rf_NullNodeUndoFunc, NULL, 1,
    312 			    nRodNodes, 2 * (nWndNodes + nRodNodes) + 1,
    313 			    nfaults, dag_h, "Xr ", allocList);
    314 	} else {
    315 		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
    316 			    rf_NullNodeUndoFunc, NULL, 1,
    317 			    1, 2 * (nWndNodes + nRodNodes) + 1,
    318 			    nfaults, dag_h, "Xr ", allocList);
    319 	}
    320 	xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
    321 	tmpNode = wndNodes;
    322 	for (i = 0; i < nWndNodes; i++) {
    323 		/* pda */
    324 		xorNode->params[2 * i + 0] = tmpNode->params[0];
    325 		/* buf ptr */
    326 		xorNode->params[2 * i + 1] = tmpNode->params[1];
    327 		tmpNode = tmpNode->list_next;
    328 	}
    329 	tmpNode = rodNodes;
    330 	for (i = 0; i < nRodNodes; i++) {
    331 		/* pda */
    332 		xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0];
    333 		/* buf ptr */
    334 		xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1];
    335 		tmpNode = tmpNode->list_next;
    336 	}
    337 	/* xor node needs to get at RAID information */
    338 	xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
    339 
    340 	/*
    341          * Look for an Rod node that reads a complete SU. If none,
    342          * alloc a buffer to receive the parity info. Note that we
    343          * can't use a new data buffer because it will not have gotten
    344          * written when the xor occurs.  */
    345 	if (allowBufferRecycle) {
    346 		tmpNode = rodNodes;
    347 		for (i = 0; i < nRodNodes; i++) {
    348 			if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
    349 				break;
    350 			tmpNode = tmpNode->list_next;
    351 		}
    352 	}
    353 	if ((!allowBufferRecycle) || (i == nRodNodes)) {
    354 		xorNode->results[0] = rf_AllocBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit));
    355 	} else {
    356 		/* this works because the only way we get here is if
    357 		   allowBufferRecycle is true and we went through the
    358 		   above for loop, and exited via the break before
    359 		   i==nRodNodes was true.  That means tmpNode will
    360 		   still point to a valid node -- the one we want for
    361 		   here! */
    362 		xorNode->results[0] = tmpNode->params[1].p;
    363 	}
    364 
    365 	/* initialize the Wnp node */
    366 	rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
    367 		    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
    368 		    dag_h, "Wnp", allocList);
    369 	wnpNode->params[0].p = asmap->parityInfo;
    370 	wnpNode->params[1].p = xorNode->results[0];
    371 	wnpNode->params[2].v = parityStripeID;
    372 	wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
    373 	/* parityInfo must describe entire parity unit */
    374 	RF_ASSERT(asmap->parityInfo->next == NULL);
    375 
    376 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    377 	if (nfaults == 2) {
    378 		/*
    379 	         * We never try to recycle a buffer for the Q calcuation
    380 	         * in addition to the parity. This would cause two buffers
    381 	         * to get smashed during the P and Q calculation, guaranteeing
    382 	         * one would be wrong.
    383 	         */
    384 		RF_MallocAndAdd(xorNode->results[1],
    385 				rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
    386 				(void *), allocList);
    387 		rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
    388 			    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc,
    389 			    1, 1, 4, 0, dag_h, "Wnq", allocList);
    390 		wnqNode->params[0].p = asmap->qInfo;
    391 		wnqNode->params[1].p = xorNode->results[1];
    392 		wnqNode->params[2].v = parityStripeID;
    393 		wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
    394 		/* parityInfo must describe entire parity unit */
    395 		RF_ASSERT(asmap->parityInfo->next == NULL);
    396 	}
    397 #endif
    398 	/*
    399          * Connect nodes to form graph.
    400          */
    401 
    402 	/* connect dag header to block node */
    403 	RF_ASSERT(blockNode->numAntecedents == 0);
    404 	dag_h->succedents[0] = blockNode;
    405 
    406 	if (nRodNodes > 0) {
    407 		/* connect the block node to the Rod nodes */
    408 		RF_ASSERT(blockNode->numSuccedents == nRodNodes);
    409 		RF_ASSERT(xorNode->numAntecedents == nRodNodes);
    410 		tmpNode = rodNodes;
    411 		for (i = 0; i < nRodNodes; i++) {
    412 			RF_ASSERT(tmpNode->numAntecedents == 1);
    413 			blockNode->succedents[i] = tmpNode;
    414 			tmpNode->antecedents[0] = blockNode;
    415 			tmpNode->antType[0] = rf_control;
    416 
    417 			/* connect the Rod nodes to the Xor node */
    418 			RF_ASSERT(tmpNode->numSuccedents == 1);
    419 			tmpNode->succedents[0] = xorNode;
    420 			xorNode->antecedents[i] = tmpNode;
    421 			xorNode->antType[i] = rf_trueData;
    422 			tmpNode = tmpNode->list_next;
    423 		}
    424 	} else {
    425 		/* connect the block node to the Xor node */
    426 		RF_ASSERT(blockNode->numSuccedents == 1);
    427 		RF_ASSERT(xorNode->numAntecedents == 1);
    428 		blockNode->succedents[0] = xorNode;
    429 		xorNode->antecedents[0] = blockNode;
    430 		xorNode->antType[0] = rf_control;
    431 	}
    432 
    433 	/* connect the xor node to the commit node */
    434 	RF_ASSERT(xorNode->numSuccedents == 1);
    435 	RF_ASSERT(commitNode->numAntecedents == 1);
    436 	xorNode->succedents[0] = commitNode;
    437 	commitNode->antecedents[0] = xorNode;
    438 	commitNode->antType[0] = rf_control;
    439 
    440 	/* connect the commit node to the write nodes */
    441 	RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
    442 	tmpNode = wndNodes;
    443 	for (i = 0; i < nWndNodes; i++) {
    444 		RF_ASSERT(wndNodes->numAntecedents == 1);
    445 		commitNode->succedents[i] = tmpNode;
    446 		tmpNode->antecedents[0] = commitNode;
    447 		tmpNode->antType[0] = rf_control;
    448 		tmpNode = tmpNode->list_next;
    449 	}
    450 	RF_ASSERT(wnpNode->numAntecedents == 1);
    451 	commitNode->succedents[nWndNodes] = wnpNode;
    452 	wnpNode->antecedents[0] = commitNode;
    453 	wnpNode->antType[0] = rf_trueData;
    454 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    455 	if (nfaults == 2) {
    456 		RF_ASSERT(wnqNode->numAntecedents == 1);
    457 		commitNode->succedents[nWndNodes + 1] = wnqNode;
    458 		wnqNode->antecedents[0] = commitNode;
    459 		wnqNode->antType[0] = rf_trueData;
    460 	}
    461 #endif
    462 	/* connect the write nodes to the term node */
    463 	RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
    464 	RF_ASSERT(termNode->numSuccedents == 0);
    465 	tmpNode = wndNodes;
    466 	for (i = 0; i < nWndNodes; i++) {
    467 		RF_ASSERT(wndNodes->numSuccedents == 1);
    468 		tmpNode->succedents[0] = termNode;
    469 		termNode->antecedents[i] = tmpNode;
    470 		termNode->antType[i] = rf_control;
    471 		tmpNode = tmpNode->list_next;
    472 	}
    473 	RF_ASSERT(wnpNode->numSuccedents == 1);
    474 	wnpNode->succedents[0] = termNode;
    475 	termNode->antecedents[nWndNodes] = wnpNode;
    476 	termNode->antType[nWndNodes] = rf_control;
    477 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    478 	if (nfaults == 2) {
    479 		RF_ASSERT(wnqNode->numSuccedents == 1);
    480 		wnqNode->succedents[0] = termNode;
    481 		termNode->antecedents[nWndNodes + 1] = wnqNode;
    482 		termNode->antType[nWndNodes + 1] = rf_control;
    483 	}
    484 #endif
    485 }
    486 /******************************************************************************
    487  *
    488  * creates a DAG to perform a small-write operation (either raid 5 or pq),
    489  * which is as follows:
    490  *
    491  * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
    492  *            \- Rod X      /     \----> Wnd [Und]-/
    493  *           [\- Rod X     /       \---> Wnd [Und]-/]
    494  *           [\- Roq -> Q /         \--> Wnq [Unq]-/]
    495  *
    496  * Rop = read old parity
    497  * Rod = read old data
    498  * Roq = read old "q"
    499  * Cmt = commit node
    500  * Und = unlock data disk
    501  * Unp = unlock parity disk
    502  * Unq = unlock q disk
    503  * Wnp = write new parity
    504  * Wnd = write new data
    505  * Wnq = write new "q"
    506  * [ ] denotes optional segments in the graph
    507  *
    508  * Parameters:  raidPtr   - description of the physical array
    509  *              asmap     - logical & physical addresses for this access
    510  *              bp        - buffer ptr (holds write data)
    511  *              flags     - general flags (e.g. disk locking)
    512  *              allocList - list of memory allocated in DAG creation
    513  *              pfuncs    - list of parity generating functions
    514  *              qfuncs    - list of q generating functions
    515  *
    516  * A null qfuncs indicates single fault tolerant
    517  *****************************************************************************/
    518 
    519 void
    520 rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
    521 			     RF_DagHeader_t *dag_h, void *bp,
    522 			     RF_RaidAccessFlags_t flags,
    523 			     RF_AllocListElem_t *allocList,
    524 			     const RF_RedFuncs_t *pfuncs,
    525 			     const RF_RedFuncs_t *qfuncs)
    526 {
    527 	RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode;
    528 	RF_DagNode_t *tmpNode, *tmpreadDataNode, *tmpreadParityNode;
    529 	RF_DagNode_t *xorNodes, *qNodes, *blockNode, *commitNode;
    530 	RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes;
    531 	RF_DagNode_t *tmpxorNode, *tmpqNode, *tmpwriteDataNode, *tmpreadQNode;
    532 	RF_DagNode_t *tmpwriteParityNode;
    533 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    534 	RF_DagNode_t *tmpwriteQNode;
    535 #endif
    536 	int     i, j, nNodes, totalNumNodes;
    537 	RF_ReconUnitNum_t which_ru;
    538 	int     (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
    539 	int     (*qfunc) (RF_DagNode_t *);
    540 	int     numDataNodes, numParityNodes;
    541 	RF_StripeNum_t parityStripeID;
    542 	RF_PhysDiskAddr_t *pda;
    543 	const char *name, *qname;
    544 	long    nfaults;
    545 
    546 	nfaults = qfuncs ? 2 : 1;
    547 
    548 	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
    549 	    asmap->raidAddress, &which_ru);
    550 	pda = asmap->physInfo;
    551 	numDataNodes = asmap->numStripeUnitsAccessed;
    552 	numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
    553 
    554 #if RF_DEBUG_DAG
    555 	if (rf_dagDebug) {
    556 		printf("[Creating small-write DAG]\n");
    557 	}
    558 #endif
    559 	RF_ASSERT(numDataNodes > 0);
    560 	dag_h->creator = "SmallWriteDAG";
    561 
    562 	dag_h->numCommitNodes = 1;
    563 	dag_h->numCommits = 0;
    564 	dag_h->numSuccedents = 1;
    565 
    566 	/*
    567          * DAG creation occurs in four steps:
    568          * 1. count the number of nodes in the DAG
    569          * 2. create the nodes
    570          * 3. initialize the nodes
    571          * 4. connect the nodes
    572          */
    573 
    574 	/*
    575          * Step 1. compute number of nodes in the graph
    576          */
    577 
    578 	/* number of nodes: a read and write for each data unit a
    579 	 * redundancy computation node for each parity node (nfaults *
    580 	 * nparity) a read and write for each parity unit a block and
    581 	 * commit node (2) a terminate node if atomic RMW an unlock
    582 	 * node for each data unit, redundancy unit */
    583 	totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
    584 	    + (nfaults * 2 * numParityNodes) + 3;
    585 	/*
    586          * Step 2. create the nodes
    587          */
    588 
    589 	blockNode = rf_AllocDAGNode();
    590 	blockNode->list_next = dag_h->nodes;
    591 	dag_h->nodes = blockNode;
    592 
    593 	commitNode = rf_AllocDAGNode();
    594 	commitNode->list_next = dag_h->nodes;
    595 	dag_h->nodes = commitNode;
    596 
    597 	for (i = 0; i < numDataNodes; i++) {
    598 		tmpNode = rf_AllocDAGNode();
    599 		tmpNode->list_next = dag_h->nodes;
    600 		dag_h->nodes = tmpNode;
    601 	}
    602 	readDataNodes = dag_h->nodes;
    603 
    604 	for (i = 0; i < numParityNodes; i++) {
    605 		tmpNode = rf_AllocDAGNode();
    606 		tmpNode->list_next = dag_h->nodes;
    607 		dag_h->nodes = tmpNode;
    608 	}
    609 	readParityNodes = dag_h->nodes;
    610 
    611 	for (i = 0; i < numDataNodes; i++) {
    612 		tmpNode = rf_AllocDAGNode();
    613 		tmpNode->list_next = dag_h->nodes;
    614 		dag_h->nodes = tmpNode;
    615 	}
    616 	writeDataNodes = dag_h->nodes;
    617 
    618 	for (i = 0; i < numParityNodes; i++) {
    619 		tmpNode = rf_AllocDAGNode();
    620 		tmpNode->list_next = dag_h->nodes;
    621 		dag_h->nodes = tmpNode;
    622 	}
    623 	writeParityNodes = dag_h->nodes;
    624 
    625 	for (i = 0; i < numParityNodes; i++) {
    626 		tmpNode = rf_AllocDAGNode();
    627 		tmpNode->list_next = dag_h->nodes;
    628 		dag_h->nodes = tmpNode;
    629 	}
    630 	xorNodes = dag_h->nodes;
    631 
    632 	termNode = rf_AllocDAGNode();
    633 	termNode->list_next = dag_h->nodes;
    634 	dag_h->nodes = termNode;
    635 
    636 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    637 	if (nfaults == 2) {
    638 		for (i = 0; i < numParityNodes; i++) {
    639 			tmpNode = rf_AllocDAGNode();
    640 			tmpNode->list_next = dag_h->nodes;
    641 			dag_h->nodes = tmpNode;
    642 		}
    643 		readQNodes = dag_h->nodes;
    644 
    645 		for (i = 0; i < numParityNodes; i++) {
    646 			tmpNode = rf_AllocDAGNode();
    647 			tmpNode->list_next = dag_h->nodes;
    648 			dag_h->nodes = tmpNode;
    649 		}
    650 		writeQNodes = dag_h->nodes;
    651 
    652 		for (i = 0; i < numParityNodes; i++) {
    653 			tmpNode = rf_AllocDAGNode();
    654 			tmpNode->list_next = dag_h->nodes;
    655 			dag_h->nodes = tmpNode;
    656 		}
    657 		qNodes = dag_h->nodes;
    658 	} else {
    659 #endif
    660 		readQNodes = writeQNodes = qNodes = NULL;
    661 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    662 	}
    663 #endif
    664 
    665 	/*
    666          * Step 3. initialize the nodes
    667          */
    668 	/* initialize block node (Nil) */
    669 	nNodes = numDataNodes + (nfaults * numParityNodes);
    670 	rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
    671 		    rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0,
    672 		    dag_h, "Nil", allocList);
    673 
    674 	/* initialize commit node (Cmt) */
    675 	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
    676 		    rf_NullNodeUndoFunc, NULL, nNodes,
    677 		    (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
    678 
    679 	/* initialize terminate node (Trm) */
    680 	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
    681 		    rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0,
    682 		    dag_h, "Trm", allocList);
    683 
    684 	/* initialize nodes which read old data (Rod) */
    685 	tmpreadDataNode = readDataNodes;
    686 	for (i = 0; i < numDataNodes; i++) {
    687 		rf_InitNode(tmpreadDataNode, rf_wait, RF_FALSE,
    688 			    rf_DiskReadFunc, rf_DiskReadUndoFunc,
    689 			    rf_GenericWakeupFunc, (nfaults * numParityNodes),
    690 			    1, 4, 0, dag_h, "Rod", allocList);
    691 		RF_ASSERT(pda != NULL);
    692 		/* physical disk addr desc */
    693 		tmpreadDataNode->params[0].p = pda;
    694 		/* buffer to hold old data */
    695 		tmpreadDataNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
    696 		tmpreadDataNode->params[2].v = parityStripeID;
    697 		tmpreadDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    698 		    which_ru);
    699 		pda = pda->next;
    700 		for (j = 0; j < tmpreadDataNode->numSuccedents; j++) {
    701 			tmpreadDataNode->propList[j] = NULL;
    702 		}
    703 		tmpreadDataNode = tmpreadDataNode->list_next;
    704 	}
    705 
    706 	/* initialize nodes which read old parity (Rop) */
    707 	pda = asmap->parityInfo;
    708 	i = 0;
    709 	tmpreadParityNode = readParityNodes;
    710 	for (i = 0; i < numParityNodes; i++) {
    711 		RF_ASSERT(pda != NULL);
    712 		rf_InitNode(tmpreadParityNode, rf_wait, RF_FALSE,
    713 			    rf_DiskReadFunc, rf_DiskReadUndoFunc,
    714 			    rf_GenericWakeupFunc, numParityNodes, 1, 4, 0,
    715 			    dag_h, "Rop", allocList);
    716 		tmpreadParityNode->params[0].p = pda;
    717 		/* buffer to hold old parity */
    718 		tmpreadParityNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
    719 		tmpreadParityNode->params[2].v = parityStripeID;
    720 		tmpreadParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    721 		    which_ru);
    722 		pda = pda->next;
    723 		for (j = 0; j < tmpreadParityNode->numSuccedents; j++) {
    724 			tmpreadParityNode->propList[0] = NULL;
    725 		}
    726 		tmpreadParityNode = tmpreadParityNode->list_next;
    727 	}
    728 
    729 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    730 	/* initialize nodes which read old Q (Roq) */
    731 	if (nfaults == 2) {
    732 		pda = asmap->qInfo;
    733 		tmpreadQNode = readQNodes;
    734 		for (i = 0; i < numParityNodes; i++) {
    735 			RF_ASSERT(pda != NULL);
    736 			rf_InitNode(tmpreadQNode, rf_wait, RF_FALSE,
    737 				    rf_DiskReadFunc, rf_DiskReadUndoFunc,
    738 				    rf_GenericWakeupFunc, numParityNodes,
    739 				    1, 4, 0, dag_h, "Roq", allocList);
    740 			tmpreadQNode->params[0].p = pda;
    741 			/* buffer to hold old Q */
    742 			tmpreadQNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h,
    743 								   pda->numSector << raidPtr->logBytesPerSector);
    744 			tmpreadQNode->params[2].v = parityStripeID;
    745 			tmpreadQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    746 			    which_ru);
    747 			pda = pda->next;
    748 			for (j = 0; j < tmpreadQNode->numSuccedents; j++) {
    749 				tmpreadQNode->propList[0] = NULL;
    750 			}
    751 			tmpreadQNode = tmpreadQNode->list_next;
    752 		}
    753 	}
    754 #endif
    755 	/* initialize nodes which write new data (Wnd) */
    756 	pda = asmap->physInfo;
    757 	tmpwriteDataNode = writeDataNodes;
    758 	for (i = 0; i < numDataNodes; i++) {
    759 		RF_ASSERT(pda != NULL);
    760 		rf_InitNode(tmpwriteDataNode, rf_wait, RF_FALSE,
    761 			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    762 			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
    763 			    "Wnd", allocList);
    764 		/* physical disk addr desc */
    765 		tmpwriteDataNode->params[0].p = pda;
    766 		/* buffer holding new data to be written */
    767 		tmpwriteDataNode->params[1].p = pda->bufPtr;
    768 		tmpwriteDataNode->params[2].v = parityStripeID;
    769 		tmpwriteDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    770 		    which_ru);
    771 		pda = pda->next;
    772 		tmpwriteDataNode = tmpwriteDataNode->list_next;
    773 	}
    774 
    775 	/*
    776          * Initialize nodes which compute new parity and Q.
    777          */
    778 	/*
    779          * We use the simple XOR func in the double-XOR case, and when
    780          * we're accessing only a portion of one stripe unit. The
    781          * distinction between the two is that the regular XOR func
    782          * assumes that the targbuf is a full SU in size, and examines
    783          * the pda associated with the buffer to decide where within
    784          * the buffer to XOR the data, whereas the simple XOR func
    785          * just XORs the data into the start of the buffer.  */
    786 	if ((numParityNodes == 2) || ((numDataNodes == 1)
    787 		&& (asmap->totalSectorsAccessed <
    788 		    raidPtr->Layout.sectorsPerStripeUnit))) {
    789 		func = pfuncs->simple;
    790 		undoFunc = rf_NullNodeUndoFunc;
    791 		name = pfuncs->SimpleName;
    792 		if (qfuncs) {
    793 			qfunc = qfuncs->simple;
    794 			qname = qfuncs->SimpleName;
    795 		} else {
    796 			qfunc = NULL;
    797 			qname = NULL;
    798 		}
    799 	} else {
    800 		func = pfuncs->regular;
    801 		undoFunc = rf_NullNodeUndoFunc;
    802 		name = pfuncs->RegularName;
    803 		if (qfuncs) {
    804 			qfunc = qfuncs->regular;
    805 			qname = qfuncs->RegularName;
    806 		} else {
    807 			qfunc = NULL;
    808 			qname = NULL;
    809 		}
    810 	}
    811 	/*
    812          * Initialize the xor nodes: params are {pda,buf}
    813          * from {Rod,Wnd,Rop} nodes, and raidPtr
    814          */
    815 	if (numParityNodes == 2) {
    816 		/* double-xor case */
    817 		tmpxorNode = xorNodes;
    818 		tmpreadDataNode = readDataNodes;
    819 		tmpreadParityNode = readParityNodes;
    820 		tmpwriteDataNode = writeDataNodes;
    821 		tmpqNode = qNodes;
    822 		tmpreadQNode = readQNodes;
    823 		for (i = 0; i < numParityNodes; i++) {
    824 			/* note: no wakeup func for xor */
    825 			rf_InitNode(tmpxorNode, rf_wait, RF_FALSE, func,
    826 				    undoFunc, NULL, 1,
    827 				    (numDataNodes + numParityNodes),
    828 				    7, 1, dag_h, name, allocList);
    829 			tmpxorNode->flags |= RF_DAGNODE_FLAG_YIELD;
    830 			tmpxorNode->params[0] = tmpreadDataNode->params[0];
    831 			tmpxorNode->params[1] = tmpreadDataNode->params[1];
    832 			tmpxorNode->params[2] = tmpreadParityNode->params[0];
    833 			tmpxorNode->params[3] = tmpreadParityNode->params[1];
    834 			tmpxorNode->params[4] = tmpwriteDataNode->params[0];
    835 			tmpxorNode->params[5] = tmpwriteDataNode->params[1];
    836 			tmpxorNode->params[6].p = raidPtr;
    837 			/* use old parity buf as target buf */
    838 			tmpxorNode->results[0] = tmpreadParityNode->params[1].p;
    839 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    840 			if (nfaults == 2) {
    841 				/* note: no wakeup func for qor */
    842 				rf_InitNode(tmpqNode, rf_wait, RF_FALSE,
    843 					    qfunc, undoFunc, NULL, 1,
    844 					    (numDataNodes + numParityNodes),
    845 					    7, 1, dag_h, qname, allocList);
    846 				tmpqNode->params[0] = tmpreadDataNode->params[0];
    847 				tmpqNode->params[1] = tmpreadDataNode->params[1];
    848 				tmpqNode->params[2] = tmpreadQNode->.params[0];
    849 				tmpqNode->params[3] = tmpreadQNode->params[1];
    850 				tmpqNode->params[4] = tmpwriteDataNode->params[0];
    851 				tmpqNode->params[5] = tmpwriteDataNode->params[1];
    852 				tmpqNode->params[6].p = raidPtr;
    853 				/* use old Q buf as target buf */
    854 				tmpqNode->results[0] = tmpreadQNode->params[1].p;
    855 				tmpqNode = tmpqNode->list_next;
    856 				tmpreadQNodes = tmpreadQNodes->list_next;
    857 			}
    858 #endif
    859 			tmpxorNode = tmpxorNode->list_next;
    860 			tmpreadDataNode = tmpreadDataNode->list_next;
    861 			tmpreadParityNode = tmpreadParityNode->list_next;
    862 			tmpwriteDataNode = tmpwriteDataNode->list_next;
    863 		}
    864 	} else {
    865 		/* there is only one xor node in this case */
    866 		rf_InitNode(xorNodes, rf_wait, RF_FALSE, func,
    867 			    undoFunc, NULL, 1, (numDataNodes + numParityNodes),
    868 			    (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
    869 			    dag_h, name, allocList);
    870 		xorNodes->flags |= RF_DAGNODE_FLAG_YIELD;
    871 		tmpreadDataNode = readDataNodes;
    872 		for (i = 0; i < numDataNodes; i++) { /* used to be"numDataNodes + 1" until we factored
    873 							out the "+1" into the "deal with Rop separately below */
    874 			/* set up params related to Rod nodes */
    875 			xorNodes->params[2 * i + 0] = tmpreadDataNode->params[0];	/* pda */
    876 			xorNodes->params[2 * i + 1] = tmpreadDataNode->params[1];	/* buffer ptr */
    877 			tmpreadDataNode = tmpreadDataNode->list_next;
    878 		}
    879 		/* deal with Rop separately */
    880 		xorNodes->params[2 * numDataNodes + 0] = readParityNodes->params[0];    /* pda */
    881 		xorNodes->params[2 * numDataNodes + 1] = readParityNodes->params[1];    /* buffer ptr */
    882 
    883 		tmpwriteDataNode = writeDataNodes;
    884 		for (i = 0; i < numDataNodes; i++) {
    885 			/* set up params related to Wnd and Wnp nodes */
    886 			xorNodes->params[2 * (numDataNodes + 1 + i) + 0] =	/* pda */
    887 			    tmpwriteDataNode->params[0];
    888 			xorNodes->params[2 * (numDataNodes + 1 + i) + 1] =	/* buffer ptr */
    889 			    tmpwriteDataNode->params[1];
    890 			tmpwriteDataNode = tmpwriteDataNode->list_next;
    891 		}
    892 		/* xor node needs to get at RAID information */
    893 		xorNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
    894 		xorNodes->results[0] = readParityNodes->params[1].p;
    895 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    896 		if (nfaults == 2) {
    897 			rf_InitNode(qNodes, rf_wait, RF_FALSE, qfunc,
    898 				    undoFunc, NULL, 1,
    899 				    (numDataNodes + numParityNodes),
    900 				    (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
    901 				    dag_h, qname, allocList);
    902 			tmpreadDataNode = readDataNodes;
    903 			for (i = 0; i < numDataNodes; i++) {
    904 				/* set up params related to Rod */
    905 				qNodes->params[2 * i + 0] = tmpreadDataNode->params[0];	/* pda */
    906 				qNodes->params[2 * i + 1] = tmpreadDataNode->params[1];	/* buffer ptr */
    907 				tmpreadDataNode = tmpreadDataNode->list_next;
    908 			}
    909 			/* and read old q */
    910 			qNodes->params[2 * numDataNodes + 0] =	/* pda */
    911 			    readQNodes->params[0];
    912 			qNodes->params[2 * numDataNodes + 1] =	/* buffer ptr */
    913 			    readQNodes->params[1];
    914 			tmpwriteDataNode = writeDataNodes;
    915 			for (i = 0; i < numDataNodes; i++) {
    916 				/* set up params related to Wnd nodes */
    917 				qNodes->params[2 * (numDataNodes + 1 + i) + 0] =	/* pda */
    918 				    tmpwriteDataNode->params[0];
    919 				qNodes->params[2 * (numDataNodes + 1 + i) + 1] =	/* buffer ptr */
    920 				    tmpwriteDataNode->params[1];
    921 				tmpwriteDataNode = tmpwriteDataNode->list_next;
    922 			}
    923 			/* xor node needs to get at RAID information */
    924 			qNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
    925 			qNodes->results[0] = readQNodes->params[1].p;
    926 		}
    927 #endif
    928 	}
    929 
    930 	/* initialize nodes which write new parity (Wnp) */
    931 	pda = asmap->parityInfo;
    932 	tmpwriteParityNode = writeParityNodes;
    933 	tmpxorNode = xorNodes;
    934 	for (i = 0; i < numParityNodes; i++) {
    935 		rf_InitNode(tmpwriteParityNode, rf_wait, RF_FALSE,
    936 			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    937 			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
    938 			    "Wnp", allocList);
    939 		RF_ASSERT(pda != NULL);
    940 		tmpwriteParityNode->params[0].p = pda;	/* param 1 (bufPtr)
    941 				  			 * filled in by xor node */
    942 		tmpwriteParityNode->params[1].p = tmpxorNode->results[0];	/* buffer pointer for
    943 				  						 * parity write
    944 				  						 * operation */
    945 		tmpwriteParityNode->params[2].v = parityStripeID;
    946 		tmpwriteParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    947 		    which_ru);
    948 		pda = pda->next;
    949 		tmpwriteParityNode = tmpwriteParityNode->list_next;
    950 		tmpxorNode = tmpxorNode->list_next;
    951 	}
    952 
    953 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    954 	/* initialize nodes which write new Q (Wnq) */
    955 	if (nfaults == 2) {
    956 		pda = asmap->qInfo;
    957 		tmpwriteQNode = writeQNodes;
    958 		tmpqNode = qNodes;
    959 		for (i = 0; i < numParityNodes; i++) {
    960 			rf_InitNode(tmpwriteQNode, rf_wait, RF_FALSE,
    961 				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    962 				    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
    963 				    "Wnq", allocList);
    964 			RF_ASSERT(pda != NULL);
    965 			tmpwriteQNode->params[0].p = pda;	/* param 1 (bufPtr)
    966 								 * filled in by xor node */
    967 			tmpwriteQNode->params[1].p = tmpqNode->results[0];	/* buffer pointer for
    968 										 * parity write
    969 										 * operation */
    970 			tmpwriteQNode->params[2].v = parityStripeID;
    971 			tmpwriteQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    972 			    which_ru);
    973 			pda = pda->next;
    974 			tmpwriteQNode = tmpwriteQNode->list_next;
    975 			tmpqNode = tmpqNode->list_next;
    976 		}
    977 	}
    978 #endif
    979 	/*
    980          * Step 4. connect the nodes.
    981          */
    982 
    983 	/* connect header to block node */
    984 	dag_h->succedents[0] = blockNode;
    985 
    986 	/* connect block node to read old data nodes */
    987 	RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
    988 	tmpreadDataNode = readDataNodes;
    989 	for (i = 0; i < numDataNodes; i++) {
    990 		blockNode->succedents[i] = tmpreadDataNode;
    991 		RF_ASSERT(tmpreadDataNode->numAntecedents == 1);
    992 		tmpreadDataNode->antecedents[0] = blockNode;
    993 		tmpreadDataNode->antType[0] = rf_control;
    994 		tmpreadDataNode = tmpreadDataNode->list_next;
    995 	}
    996 
    997 	/* connect block node to read old parity nodes */
    998 	tmpreadParityNode = readParityNodes;
    999 	for (i = 0; i < numParityNodes; i++) {
   1000 		blockNode->succedents[numDataNodes + i] = tmpreadParityNode;
   1001 		RF_ASSERT(tmpreadParityNode->numAntecedents == 1);
   1002 		tmpreadParityNode->antecedents[0] = blockNode;
   1003 		tmpreadParityNode->antType[0] = rf_control;
   1004 		tmpreadParityNode = tmpreadParityNode->list_next;
   1005 	}
   1006 
   1007 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   1008 	/* connect block node to read old Q nodes */
   1009 	if (nfaults == 2) {
   1010 		tmpreadQNode = readQNodes;
   1011 		for (i = 0; i < numParityNodes; i++) {
   1012 			blockNode->succedents[numDataNodes + numParityNodes + i] = tmpreadQNode;
   1013 			RF_ASSERT(tmpreadQNode->numAntecedents == 1);
   1014 			tmpreadQNode->antecedents[0] = blockNode;
   1015 			tmpreadQNode->antType[0] = rf_control;
   1016 			tmpreadQNode = tmpreadQNode->list_next;
   1017 		}
   1018 	}
   1019 #endif
   1020 	/* connect read old data nodes to xor nodes */
   1021 	tmpreadDataNode = readDataNodes;
   1022 	for (i = 0; i < numDataNodes; i++) {
   1023 		RF_ASSERT(tmpreadDataNode->numSuccedents == (nfaults * numParityNodes));
   1024 		tmpxorNode = xorNodes;
   1025 		for (j = 0; j < numParityNodes; j++) {
   1026 			RF_ASSERT(tmpxorNode->numAntecedents == numDataNodes + numParityNodes);
   1027 			tmpreadDataNode->succedents[j] = tmpxorNode;
   1028 			tmpxorNode->antecedents[i] = tmpreadDataNode;
   1029 			tmpxorNode->antType[i] = rf_trueData;
   1030 			tmpxorNode = tmpxorNode->list_next;
   1031 		}
   1032 		tmpreadDataNode = tmpreadDataNode->list_next;
   1033 	}
   1034 
   1035 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   1036 	/* connect read old data nodes to q nodes */
   1037 	if (nfaults == 2) {
   1038 		tmpreadDataNode = readDataNodes;
   1039 		for (i = 0; i < numDataNodes; i++) {
   1040 			tmpqNode = qNodes;
   1041 			for (j = 0; j < numParityNodes; j++) {
   1042 				RF_ASSERT(tmpqNode->numAntecedents == numDataNodes + numParityNodes);
   1043 				tmpreadDataNode->succedents[numParityNodes + j] = tmpqNode;
   1044 				tmpqNode->antecedents[i] = tmpreadDataNode;
   1045 				tmpqNode->antType[i] = rf_trueData;
   1046 				tmpqNode = tmpqNode->list_next;
   1047 			}
   1048 			tmpreadDataNode = tmpreadDataNode->list_next;
   1049 		}
   1050 	}
   1051 #endif
   1052 	/* connect read old parity nodes to xor nodes */
   1053 	tmpreadParityNode = readParityNodes;
   1054 	for (i = 0; i < numParityNodes; i++) {
   1055 		RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
   1056 		tmpxorNode = xorNodes;
   1057 		for (j = 0; j < numParityNodes; j++) {
   1058 			tmpreadParityNode->succedents[j] = tmpxorNode;
   1059 			tmpxorNode->antecedents[numDataNodes + i] = tmpreadParityNode;
   1060 			tmpxorNode->antType[numDataNodes + i] = rf_trueData;
   1061 			tmpxorNode = tmpxorNode->list_next;
   1062 		}
   1063 		tmpreadParityNode = tmpreadParityNode->list_next;
   1064 	}
   1065 
   1066 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   1067 	/* connect read old q nodes to q nodes */
   1068 	if (nfaults == 2) {
   1069 		tmpreadParityNode = readParityNodes;
   1070 		tmpreadQNode = readQNodes;
   1071 		for (i = 0; i < numParityNodes; i++) {
   1072 			RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
   1073 			tmpqNode = qNodes;
   1074 			for (j = 0; j < numParityNodes; j++) {
   1075 				tmpreadQNode->succedents[j] = tmpqNode;
   1076 				tmpqNode->antecedents[numDataNodes + i] = tmpreadQNodes;
   1077 				tmpqNode->antType[numDataNodes + i] = rf_trueData;
   1078 				tmpqNode = tmpqNode->list_next;
   1079 			}
   1080 			tmpreadParityNode = tmpreadParityNode->list_next;
   1081 			tmpreadQNode = tmpreadQNode->list_next;
   1082 		}
   1083 	}
   1084 #endif
   1085 	/* connect xor nodes to commit node */
   1086 	RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
   1087 	tmpxorNode = xorNodes;
   1088 	for (i = 0; i < numParityNodes; i++) {
   1089 		RF_ASSERT(tmpxorNode->numSuccedents == 1);
   1090 		tmpxorNode->succedents[0] = commitNode;
   1091 		commitNode->antecedents[i] = tmpxorNode;
   1092 		commitNode->antType[i] = rf_control;
   1093 		tmpxorNode = tmpxorNode->list_next;
   1094 	}
   1095 
   1096 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   1097 	/* connect q nodes to commit node */
   1098 	if (nfaults == 2) {
   1099 		tmpqNode = qNodes;
   1100 		for (i = 0; i < numParityNodes; i++) {
   1101 			RF_ASSERT(tmpqNode->numSuccedents == 1);
   1102 			tmpqNode->succedents[0] = commitNode;
   1103 			commitNode->antecedents[i + numParityNodes] = tmpqNode;
   1104 			commitNode->antType[i + numParityNodes] = rf_control;
   1105 			tmpqNode = tmpqNode->list_next;
   1106 		}
   1107 	}
   1108 #endif
   1109 	/* connect commit node to write nodes */
   1110 	RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
   1111 	tmpwriteDataNode = writeDataNodes;
   1112 	for (i = 0; i < numDataNodes; i++) {
   1113 		RF_ASSERT(tmpwriteDataNode->numAntecedents == 1);
   1114 		commitNode->succedents[i] = tmpwriteDataNode;
   1115 		tmpwriteDataNode->antecedents[0] = commitNode;
   1116 		tmpwriteDataNode->antType[0] = rf_trueData;
   1117 		tmpwriteDataNode = tmpwriteDataNode->list_next;
   1118 	}
   1119 	tmpwriteParityNode = writeParityNodes;
   1120 	for (i = 0; i < numParityNodes; i++) {
   1121 		RF_ASSERT(tmpwriteParityNode->numAntecedents == 1);
   1122 		commitNode->succedents[i + numDataNodes] = tmpwriteParityNode;
   1123 		tmpwriteParityNode->antecedents[0] = commitNode;
   1124 		tmpwriteParityNode->antType[0] = rf_trueData;
   1125 		tmpwriteParityNode = tmpwriteParityNode->list_next;
   1126 	}
   1127 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   1128 	if (nfaults == 2) {
   1129 		tmpwriteQNode = writeQNodes;
   1130 		for (i = 0; i < numParityNodes; i++) {
   1131 			RF_ASSERT(tmpwriteQNode->numAntecedents == 1);
   1132 			commitNode->succedents[i + numDataNodes + numParityNodes] = tmpwriteQNode;
   1133 			tmpwriteQNode->antecedents[0] = commitNode;
   1134 			tmpwriteQNode->antType[0] = rf_trueData;
   1135 			tmpwriteQNode = tmpwriteQNode->list_next;
   1136 		}
   1137 	}
   1138 #endif
   1139 	RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
   1140 	RF_ASSERT(termNode->numSuccedents == 0);
   1141 	tmpwriteDataNode = writeDataNodes;
   1142 	for (i = 0; i < numDataNodes; i++) {
   1143 		/* connect write new data nodes to term node */
   1144 		RF_ASSERT(tmpwriteDataNode->numSuccedents == 1);
   1145 		RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
   1146 		tmpwriteDataNode->succedents[0] = termNode;
   1147 		termNode->antecedents[i] = tmpwriteDataNode;
   1148 		termNode->antType[i] = rf_control;
   1149 		tmpwriteDataNode = tmpwriteDataNode->list_next;
   1150 	}
   1151 
   1152 	tmpwriteParityNode = writeParityNodes;
   1153 	for (i = 0; i < numParityNodes; i++) {
   1154 		RF_ASSERT(tmpwriteParityNode->numSuccedents == 1);
   1155 		tmpwriteParityNode->succedents[0] = termNode;
   1156 		termNode->antecedents[numDataNodes + i] = tmpwriteParityNode;
   1157 		termNode->antType[numDataNodes + i] = rf_control;
   1158 		tmpwriteParityNode = tmpwriteParityNode->list_next;
   1159 	}
   1160 
   1161 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   1162 	if (nfaults == 2) {
   1163 		tmpwriteQNode = writeQNodes;
   1164 		for (i = 0; i < numParityNodes; i++) {
   1165 			RF_ASSERT(tmpwriteQNode->numSuccedents == 1);
   1166 			tmpwriteQNode->succedents[0] = termNode;
   1167 			termNode->antecedents[numDataNodes + numParityNodes + i] = tmpwriteQNode;
   1168 			termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
   1169 			tmpwriteQNode = tmpwriteQNode->list_next;
   1170 		}
   1171 	}
   1172 #endif
   1173 }
   1174 
   1175 
   1176 /******************************************************************************
   1177  * create a write graph (fault-free or degraded) for RAID level 1
   1178  *
   1179  * Hdr -> Commit -> Wpd -> Nil -> Trm
   1180  *               -> Wsd ->
   1181  *
   1182  * The "Wpd" node writes data to the primary copy in the mirror pair
   1183  * The "Wsd" node writes data to the secondary copy in the mirror pair
   1184  *
   1185  * Parameters:  raidPtr   - description of the physical array
   1186  *              asmap     - logical & physical addresses for this access
   1187  *              bp        - buffer ptr (holds write data)
   1188  *              flags     - general flags (e.g. disk locking)
   1189  *              allocList - list of memory allocated in DAG creation
   1190  *****************************************************************************/
   1191 
   1192 void
   1193 rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
   1194 			 RF_DagHeader_t *dag_h, void *bp,
   1195 			 RF_RaidAccessFlags_t flags,
   1196 			 RF_AllocListElem_t *allocList)
   1197 {
   1198 	RF_DagNode_t *unblockNode, *termNode, *commitNode;
   1199 	RF_DagNode_t *wndNode, *wmirNode;
   1200 	RF_DagNode_t *tmpNode, *tmpwndNode, *tmpwmirNode;
   1201 	int     nWndNodes, nWmirNodes, i;
   1202 	RF_ReconUnitNum_t which_ru;
   1203 	RF_PhysDiskAddr_t *pda, *pdaP;
   1204 	RF_StripeNum_t parityStripeID;
   1205 
   1206 	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
   1207 	    asmap->raidAddress, &which_ru);
   1208 #if RF_DEBUG_DAG
   1209 	if (rf_dagDebug) {
   1210 		printf("[Creating RAID level 1 write DAG]\n");
   1211 	}
   1212 #endif
   1213 	dag_h->creator = "RaidOneWriteDAG";
   1214 
   1215 	/* 2 implies access not SU aligned */
   1216 	nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
   1217 	nWndNodes = (asmap->physInfo->next) ? 2 : 1;
   1218 
   1219 	/* alloc the Wnd nodes and the Wmir node */
   1220 	if (asmap->numDataFailed == 1)
   1221 		nWndNodes--;
   1222 	if (asmap->numParityFailed == 1)
   1223 		nWmirNodes--;
   1224 
   1225 	/* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
   1226 	 * + terminator) */
   1227 	for (i = 0; i < nWndNodes; i++) {
   1228 		tmpNode = rf_AllocDAGNode();
   1229 		tmpNode->list_next = dag_h->nodes;
   1230 		dag_h->nodes = tmpNode;
   1231 	}
   1232 	wndNode = dag_h->nodes;
   1233 
   1234 	for (i = 0; i < nWmirNodes; i++) {
   1235 		tmpNode = rf_AllocDAGNode();
   1236 		tmpNode->list_next = dag_h->nodes;
   1237 		dag_h->nodes = tmpNode;
   1238 	}
   1239 	wmirNode = dag_h->nodes;
   1240 
   1241 	commitNode = rf_AllocDAGNode();
   1242 	commitNode->list_next = dag_h->nodes;
   1243 	dag_h->nodes = commitNode;
   1244 
   1245 	unblockNode = rf_AllocDAGNode();
   1246 	unblockNode->list_next = dag_h->nodes;
   1247 	dag_h->nodes = unblockNode;
   1248 
   1249 	termNode = rf_AllocDAGNode();
   1250 	termNode->list_next = dag_h->nodes;
   1251 	dag_h->nodes = termNode;
   1252 
   1253 	/* this dag can commit immediately */
   1254 	dag_h->numCommitNodes = 1;
   1255 	dag_h->numCommits = 0;
   1256 	dag_h->numSuccedents = 1;
   1257 
   1258 	/* initialize the commit, unblock, and term nodes */
   1259 	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
   1260 		    rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes),
   1261 		    0, 0, 0, dag_h, "Cmt", allocList);
   1262 	rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
   1263 		    rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes),
   1264 		    0, 0, dag_h, "Nil", allocList);
   1265 	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
   1266 		    rf_TerminateUndoFunc, NULL, 0, 1, 0, 0,
   1267 		    dag_h, "Trm", allocList);
   1268 
   1269 	/* initialize the wnd nodes */
   1270 	if (nWndNodes > 0) {
   1271 		pda = asmap->physInfo;
   1272 		tmpwndNode = wndNode;
   1273 		for (i = 0; i < nWndNodes; i++) {
   1274 			rf_InitNode(tmpwndNode, rf_wait, RF_FALSE,
   1275 				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
   1276 				    rf_GenericWakeupFunc, 1, 1, 4, 0,
   1277 				    dag_h, "Wpd", allocList);
   1278 			RF_ASSERT(pda != NULL);
   1279 			tmpwndNode->params[0].p = pda;
   1280 			tmpwndNode->params[1].p = pda->bufPtr;
   1281 			tmpwndNode->params[2].v = parityStripeID;
   1282 			tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
   1283 			pda = pda->next;
   1284 			tmpwndNode = tmpwndNode->list_next;
   1285 		}
   1286 		RF_ASSERT(pda == NULL);
   1287 	}
   1288 	/* initialize the mirror nodes */
   1289 	if (nWmirNodes > 0) {
   1290 		pda = asmap->physInfo;
   1291 		pdaP = asmap->parityInfo;
   1292 		tmpwmirNode = wmirNode;
   1293 		for (i = 0; i < nWmirNodes; i++) {
   1294 			rf_InitNode(tmpwmirNode, rf_wait, RF_FALSE,
   1295 				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
   1296 				    rf_GenericWakeupFunc, 1, 1, 4, 0,
   1297 				    dag_h, "Wsd", allocList);
   1298 			RF_ASSERT(pda != NULL);
   1299 			tmpwmirNode->params[0].p = pdaP;
   1300 			tmpwmirNode->params[1].p = pda->bufPtr;
   1301 			tmpwmirNode->params[2].v = parityStripeID;
   1302 			tmpwmirNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
   1303 			pda = pda->next;
   1304 			pdaP = pdaP->next;
   1305 			tmpwmirNode = tmpwmirNode->list_next;
   1306 		}
   1307 		RF_ASSERT(pda == NULL);
   1308 		RF_ASSERT(pdaP == NULL);
   1309 	}
   1310 	/* link the header node to the commit node */
   1311 	RF_ASSERT(dag_h->numSuccedents == 1);
   1312 	RF_ASSERT(commitNode->numAntecedents == 0);
   1313 	dag_h->succedents[0] = commitNode;
   1314 
   1315 	/* link the commit node to the write nodes */
   1316 	RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
   1317 	tmpwndNode = wndNode;
   1318 	for (i = 0; i < nWndNodes; i++) {
   1319 		RF_ASSERT(tmpwndNode->numAntecedents == 1);
   1320 		commitNode->succedents[i] = tmpwndNode;
   1321 		tmpwndNode->antecedents[0] = commitNode;
   1322 		tmpwndNode->antType[0] = rf_control;
   1323 		tmpwndNode = tmpwndNode->list_next;
   1324 	}
   1325 	tmpwmirNode = wmirNode;
   1326 	for (i = 0; i < nWmirNodes; i++) {
   1327 		RF_ASSERT(tmpwmirNode->numAntecedents == 1);
   1328 		commitNode->succedents[i + nWndNodes] = tmpwmirNode;
   1329 		tmpwmirNode->antecedents[0] = commitNode;
   1330 		tmpwmirNode->antType[0] = rf_control;
   1331 		tmpwmirNode = tmpwmirNode->list_next;
   1332 	}
   1333 
   1334 	/* link the write nodes to the unblock node */
   1335 	RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
   1336 	tmpwndNode = wndNode;
   1337 	for (i = 0; i < nWndNodes; i++) {
   1338 		RF_ASSERT(tmpwndNode->numSuccedents == 1);
   1339 		tmpwndNode->succedents[0] = unblockNode;
   1340 		unblockNode->antecedents[i] = tmpwndNode;
   1341 		unblockNode->antType[i] = rf_control;
   1342 		tmpwndNode = tmpwndNode->list_next;
   1343 	}
   1344 	tmpwmirNode = wmirNode;
   1345 	for (i = 0; i < nWmirNodes; i++) {
   1346 		RF_ASSERT(tmpwmirNode->numSuccedents == 1);
   1347 		tmpwmirNode->succedents[0] = unblockNode;
   1348 		unblockNode->antecedents[i + nWndNodes] = tmpwmirNode;
   1349 		unblockNode->antType[i + nWndNodes] = rf_control;
   1350 		tmpwmirNode = tmpwmirNode->list_next;
   1351 	}
   1352 
   1353 	/* link the unblock node to the term node */
   1354 	RF_ASSERT(unblockNode->numSuccedents == 1);
   1355 	RF_ASSERT(termNode->numAntecedents == 1);
   1356 	RF_ASSERT(termNode->numSuccedents == 0);
   1357 	unblockNode->succedents[0] = termNode;
   1358 	termNode->antecedents[0] = unblockNode;
   1359 	termNode->antType[0] = rf_control;
   1360 }
   1361