Home | History | Annotate | Line # | Download | only in raidframe
rf_dagffwr.c revision 1.33.88.1
      1  1.33.88.1      yamt /*	$NetBSD: rf_dagffwr.c,v 1.33.88.1 2014/05/22 11:40:35 yamt Exp $	*/
      2        1.1     oster /*
      3        1.1     oster  * Copyright (c) 1995 Carnegie-Mellon University.
      4        1.1     oster  * All rights reserved.
      5        1.1     oster  *
      6        1.1     oster  * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
      7        1.1     oster  *
      8        1.1     oster  * Permission to use, copy, modify and distribute this software and
      9        1.1     oster  * its documentation is hereby granted, provided that both the copyright
     10        1.1     oster  * notice and this permission notice appear in all copies of the
     11        1.1     oster  * software, derivative works or modified versions, and any portions
     12        1.1     oster  * thereof, and that both notices appear in supporting documentation.
     13        1.1     oster  *
     14        1.1     oster  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15        1.1     oster  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16        1.1     oster  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17        1.1     oster  *
     18        1.1     oster  * Carnegie Mellon requests users of this software to return to
     19        1.1     oster  *
     20        1.1     oster  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21        1.1     oster  *  School of Computer Science
     22        1.1     oster  *  Carnegie Mellon University
     23        1.1     oster  *  Pittsburgh PA 15213-3890
     24        1.1     oster  *
     25        1.1     oster  * any improvements or extensions that they make and grant Carnegie the
     26        1.1     oster  * rights to redistribute these changes.
     27        1.1     oster  */
     28        1.1     oster 
     29        1.1     oster /*
     30        1.1     oster  * rf_dagff.c
     31        1.1     oster  *
     32        1.1     oster  * code for creating fault-free DAGs
     33        1.1     oster  *
     34        1.1     oster  */
     35        1.7     lukem 
     36        1.7     lukem #include <sys/cdefs.h>
     37  1.33.88.1      yamt __KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 1.33.88.1 2014/05/22 11:40:35 yamt Exp $");
     38        1.1     oster 
     39        1.6     oster #include <dev/raidframe/raidframevar.h>
     40        1.6     oster 
     41        1.1     oster #include "rf_raid.h"
     42        1.1     oster #include "rf_dag.h"
     43        1.1     oster #include "rf_dagutils.h"
     44        1.1     oster #include "rf_dagfuncs.h"
     45        1.1     oster #include "rf_debugMem.h"
     46        1.1     oster #include "rf_dagffrd.h"
     47        1.1     oster #include "rf_general.h"
     48        1.1     oster #include "rf_dagffwr.h"
     49       1.23     oster #include "rf_map.h"
     50        1.1     oster 
     51        1.1     oster /******************************************************************************
     52        1.1     oster  *
     53        1.1     oster  * General comments on DAG creation:
     54        1.3     oster  *
     55        1.1     oster  * All DAGs in this file use roll-away error recovery.  Each DAG has a single
     56        1.1     oster  * commit node, usually called "Cmt."  If an error occurs before the Cmt node
     57        1.1     oster  * is reached, the execution engine will halt forward execution and work
     58        1.1     oster  * backward through the graph, executing the undo functions.  Assuming that
     59        1.1     oster  * each node in the graph prior to the Cmt node are undoable and atomic - or -
     60        1.1     oster  * does not make changes to permanent state, the graph will fail atomically.
     61        1.1     oster  * If an error occurs after the Cmt node executes, the engine will roll-forward
     62        1.1     oster  * through the graph, blindly executing nodes until it reaches the end.
     63        1.1     oster  * If a graph reaches the end, it is assumed to have completed successfully.
     64        1.1     oster  *
     65        1.1     oster  * A graph has only 1 Cmt node.
     66        1.1     oster  *
     67        1.1     oster  */
     68        1.1     oster 
     69        1.1     oster 
     70        1.1     oster /******************************************************************************
     71        1.1     oster  *
     72        1.1     oster  * The following wrappers map the standard DAG creation interface to the
     73        1.1     oster  * DAG creation routines.  Additionally, these wrappers enable experimentation
     74        1.1     oster  * with new DAG structures by providing an extra level of indirection, allowing
     75        1.1     oster  * the DAG creation routines to be replaced at this single point.
     76        1.1     oster  */
     77        1.1     oster 
     78        1.1     oster 
     79       1.29     perry void
     80       1.13     oster rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
     81       1.13     oster 			      RF_DagHeader_t *dag_h, void *bp,
     82       1.13     oster 			      RF_RaidAccessFlags_t flags,
     83       1.13     oster 			      RF_AllocListElem_t *allocList,
     84       1.33  christos 			      RF_IoType_t type)
     85        1.1     oster {
     86        1.3     oster 	rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
     87       1.14     oster 				 RF_IO_TYPE_WRITE);
     88        1.1     oster }
     89        1.1     oster 
     90       1.29     perry void
     91       1.13     oster rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
     92       1.13     oster 		       RF_DagHeader_t *dag_h, void *bp,
     93       1.13     oster 		       RF_RaidAccessFlags_t flags,
     94       1.13     oster 		       RF_AllocListElem_t *allocList,
     95       1.33  christos 		       RF_IoType_t type)
     96        1.1     oster {
     97        1.3     oster 	rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
     98       1.14     oster 				 RF_IO_TYPE_WRITE);
     99        1.1     oster }
    100        1.1     oster 
    101       1.29     perry void
    102       1.13     oster rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
    103       1.13     oster 		       RF_DagHeader_t *dag_h, void *bp,
    104       1.13     oster 		       RF_RaidAccessFlags_t flags,
    105       1.13     oster 		       RF_AllocListElem_t *allocList)
    106        1.1     oster {
    107        1.3     oster 	/* "normal" rollaway */
    108       1.29     perry 	rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
    109       1.14     oster 				     allocList, &rf_xorFuncs, NULL);
    110        1.1     oster }
    111        1.1     oster 
    112       1.29     perry void
    113       1.13     oster rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
    114       1.13     oster 		       RF_DagHeader_t *dag_h, void *bp,
    115       1.13     oster 		       RF_RaidAccessFlags_t flags,
    116       1.13     oster 		       RF_AllocListElem_t *allocList)
    117        1.1     oster {
    118        1.3     oster 	/* "normal" rollaway */
    119       1.29     perry 	rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
    120       1.14     oster 				     allocList, 1, rf_RegularXorFunc, RF_TRUE);
    121        1.1     oster }
    122        1.1     oster 
    123        1.1     oster 
    124        1.1     oster /******************************************************************************
    125        1.1     oster  *
    126        1.1     oster  * DAG creation code begins here
    127        1.1     oster  */
    128        1.1     oster 
    129        1.1     oster 
    130        1.1     oster /******************************************************************************
    131        1.1     oster  *
    132        1.1     oster  * creates a DAG to perform a large-write operation:
    133        1.1     oster  *
    134        1.1     oster  *           / Rod \           / Wnd \
    135        1.1     oster  * H -- block- Rod - Xor - Cmt - Wnd --- T
    136        1.1     oster  *           \ Rod /          \  Wnp /
    137        1.1     oster  *                             \[Wnq]/
    138        1.1     oster  *
    139        1.1     oster  * The XOR node also does the Q calculation in the P+Q architecture.
    140        1.1     oster  * All nodes are before the commit node (Cmt) are assumed to be atomic and
    141        1.1     oster  * undoable - or - they make no changes to permanent state.
    142        1.1     oster  *
    143        1.1     oster  * Rod = read old data
    144        1.1     oster  * Cmt = commit node
    145        1.1     oster  * Wnp = write new parity
    146        1.1     oster  * Wnd = write new data
    147        1.1     oster  * Wnq = write new "q"
    148        1.1     oster  * [] denotes optional segments in the graph
    149        1.1     oster  *
    150        1.1     oster  * Parameters:  raidPtr   - description of the physical array
    151        1.1     oster  *              asmap     - logical & physical addresses for this access
    152        1.1     oster  *              bp        - buffer ptr (holds write data)
    153        1.3     oster  *              flags     - general flags (e.g. disk locking)
    154        1.1     oster  *              allocList - list of memory allocated in DAG creation
    155        1.1     oster  *              nfaults   - number of faults array can tolerate
    156        1.1     oster  *                          (equal to # redundancy units in stripe)
    157        1.1     oster  *              redfuncs  - list of redundancy generating functions
    158        1.1     oster  *
    159        1.1     oster  *****************************************************************************/
    160        1.1     oster 
    161       1.29     perry void
    162       1.13     oster rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
    163       1.33  christos 			     RF_DagHeader_t *dag_h, void *bp,
    164       1.33  christos 			     RF_RaidAccessFlags_t flags,
    165       1.13     oster 			     RF_AllocListElem_t *allocList,
    166       1.13     oster 			     int nfaults, int (*redFunc) (RF_DagNode_t *),
    167       1.13     oster 			     int allowBufferRecycle)
    168        1.1     oster {
    169       1.22     oster 	RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode;
    170  1.33.88.1      yamt 	RF_DagNode_t *blockNode, *commitNode, *termNode;
    171  1.33.88.1      yamt #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    172  1.33.88.1      yamt 	RF_DagNode_t *wnqNode;
    173  1.33.88.1      yamt #endif
    174        1.3     oster 	int     nWndNodes, nRodNodes, i, nodeNum, asmNum;
    175        1.3     oster 	RF_AccessStripeMapHeader_t *new_asm_h[2];
    176        1.3     oster 	RF_StripeNum_t parityStripeID;
    177        1.3     oster 	char   *sosBuffer, *eosBuffer;
    178        1.3     oster 	RF_ReconUnitNum_t which_ru;
    179        1.3     oster 	RF_RaidLayout_t *layoutPtr;
    180        1.3     oster 	RF_PhysDiskAddr_t *pda;
    181        1.3     oster 
    182        1.3     oster 	layoutPtr = &(raidPtr->Layout);
    183       1.29     perry 	parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
    184       1.14     oster 							asmap->raidAddress,
    185       1.14     oster 							&which_ru);
    186        1.3     oster 
    187       1.19     oster #if RF_DEBUG_DAG
    188        1.3     oster 	if (rf_dagDebug) {
    189        1.3     oster 		printf("[Creating large-write DAG]\n");
    190        1.3     oster 	}
    191       1.19     oster #endif
    192        1.3     oster 	dag_h->creator = "LargeWriteDAG";
    193        1.3     oster 
    194        1.3     oster 	dag_h->numCommitNodes = 1;
    195        1.3     oster 	dag_h->numCommits = 0;
    196        1.3     oster 	dag_h->numSuccedents = 1;
    197        1.3     oster 
    198        1.3     oster 	/* alloc the nodes: Wnd, xor, commit, block, term, and  Wnp */
    199        1.3     oster 	nWndNodes = asmap->numStripeUnitsAccessed;
    200       1.22     oster 
    201       1.22     oster 	for (i = 0; i < nWndNodes; i++) {
    202       1.22     oster 		tmpNode = rf_AllocDAGNode();
    203       1.22     oster 		tmpNode->list_next = dag_h->nodes;
    204       1.22     oster 		dag_h->nodes = tmpNode;
    205       1.22     oster 	}
    206       1.22     oster 	wndNodes = dag_h->nodes;
    207       1.22     oster 
    208       1.22     oster 	xorNode = rf_AllocDAGNode();
    209       1.22     oster 	xorNode->list_next = dag_h->nodes;
    210       1.22     oster 	dag_h->nodes = xorNode;
    211       1.22     oster 
    212       1.22     oster 	wnpNode = rf_AllocDAGNode();
    213       1.22     oster 	wnpNode->list_next = dag_h->nodes;
    214       1.22     oster 	dag_h->nodes = wnpNode;
    215       1.22     oster 
    216       1.22     oster 	blockNode = rf_AllocDAGNode();
    217       1.22     oster 	blockNode->list_next = dag_h->nodes;
    218       1.22     oster 	dag_h->nodes = blockNode;
    219       1.22     oster 
    220       1.22     oster 	commitNode = rf_AllocDAGNode();
    221       1.22     oster 	commitNode->list_next = dag_h->nodes;
    222       1.22     oster 	dag_h->nodes = commitNode;
    223       1.22     oster 
    224       1.22     oster 	termNode = rf_AllocDAGNode();
    225       1.22     oster 	termNode->list_next = dag_h->nodes;
    226       1.22     oster 	dag_h->nodes = termNode;
    227       1.22     oster 
    228       1.20     oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    229        1.3     oster 	if (nfaults == 2) {
    230       1.22     oster 		wnqNode = rf_AllocDAGNode();
    231        1.3     oster 	} else {
    232        1.3     oster 		wnqNode = NULL;
    233        1.3     oster 	}
    234       1.20     oster #endif
    235       1.29     perry 	rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
    236       1.29     perry 					new_asm_h, &nRodNodes, &sosBuffer,
    237       1.14     oster 					&eosBuffer, allocList);
    238        1.3     oster 	if (nRodNodes > 0) {
    239       1.22     oster 		for (i = 0; i < nRodNodes; i++) {
    240       1.22     oster 			tmpNode = rf_AllocDAGNode();
    241       1.22     oster 			tmpNode->list_next = dag_h->nodes;
    242       1.22     oster 			dag_h->nodes = tmpNode;
    243       1.22     oster 		}
    244       1.22     oster 		rodNodes = dag_h->nodes;
    245        1.3     oster 	} else {
    246        1.3     oster 		rodNodes = NULL;
    247        1.3     oster 	}
    248        1.3     oster 
    249        1.3     oster 	/* begin node initialization */
    250        1.3     oster 	if (nRodNodes > 0) {
    251       1.29     perry 		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
    252       1.29     perry 			    rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0,
    253       1.14     oster 			    dag_h, "Nil", allocList);
    254        1.3     oster 	} else {
    255       1.29     perry 		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
    256       1.29     perry 			    rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0,
    257       1.14     oster 			    dag_h, "Nil", allocList);
    258        1.3     oster 	}
    259        1.3     oster 
    260       1.29     perry 	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
    261       1.29     perry 		    rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0,
    262       1.14     oster 		    dag_h, "Cmt", allocList);
    263       1.29     perry 	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
    264       1.29     perry 		    rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0,
    265       1.14     oster 		    dag_h, "Trm", allocList);
    266        1.3     oster 
    267        1.3     oster 	/* initialize the Rod nodes */
    268       1.22     oster 	tmpNode = rodNodes;
    269        1.3     oster 	for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
    270        1.3     oster 		if (new_asm_h[asmNum]) {
    271        1.3     oster 			pda = new_asm_h[asmNum]->stripeMap->physInfo;
    272        1.3     oster 			while (pda) {
    273       1.29     perry 				rf_InitNode(tmpNode, rf_wait,
    274       1.14     oster 					    RF_FALSE, rf_DiskReadFunc,
    275       1.29     perry 					    rf_DiskReadUndoFunc,
    276       1.29     perry 					    rf_GenericWakeupFunc,
    277       1.14     oster 					    1, 1, 4, 0, dag_h,
    278       1.14     oster 					    "Rod", allocList);
    279       1.22     oster 				tmpNode->params[0].p = pda;
    280       1.22     oster 				tmpNode->params[1].p = pda->bufPtr;
    281       1.22     oster 				tmpNode->params[2].v = parityStripeID;
    282       1.22     oster 				tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    283       1.17     oster 				    which_ru);
    284        1.3     oster 				nodeNum++;
    285        1.3     oster 				pda = pda->next;
    286       1.22     oster 				tmpNode = tmpNode->list_next;
    287        1.3     oster 			}
    288        1.3     oster 		}
    289        1.3     oster 	}
    290        1.3     oster 	RF_ASSERT(nodeNum == nRodNodes);
    291        1.3     oster 
    292        1.3     oster 	/* initialize the wnd nodes */
    293        1.3     oster 	pda = asmap->physInfo;
    294       1.22     oster 	tmpNode = wndNodes;
    295        1.3     oster 	for (i = 0; i < nWndNodes; i++) {
    296       1.29     perry 		rf_InitNode(tmpNode, rf_wait, RF_FALSE,
    297       1.14     oster 			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    298       1.29     perry 			    rf_GenericWakeupFunc, 1, 1, 4, 0,
    299       1.14     oster 			    dag_h, "Wnd", allocList);
    300        1.3     oster 		RF_ASSERT(pda != NULL);
    301       1.22     oster 		tmpNode->params[0].p = pda;
    302       1.22     oster 		tmpNode->params[1].p = pda->bufPtr;
    303       1.22     oster 		tmpNode->params[2].v = parityStripeID;
    304       1.22     oster 		tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
    305        1.3     oster 		pda = pda->next;
    306       1.22     oster 		tmpNode = tmpNode->list_next;
    307        1.3     oster 	}
    308        1.3     oster 
    309        1.3     oster 	/* initialize the redundancy node */
    310        1.3     oster 	if (nRodNodes > 0) {
    311       1.29     perry 		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
    312       1.14     oster 			    rf_NullNodeUndoFunc, NULL, 1,
    313       1.29     perry 			    nRodNodes, 2 * (nWndNodes + nRodNodes) + 1,
    314       1.14     oster 			    nfaults, dag_h, "Xr ", allocList);
    315        1.3     oster 	} else {
    316       1.29     perry 		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
    317       1.14     oster 			    rf_NullNodeUndoFunc, NULL, 1,
    318       1.29     perry 			    1, 2 * (nWndNodes + nRodNodes) + 1,
    319       1.14     oster 			    nfaults, dag_h, "Xr ", allocList);
    320        1.3     oster 	}
    321        1.3     oster 	xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
    322       1.22     oster 	tmpNode = wndNodes;
    323        1.3     oster 	for (i = 0; i < nWndNodes; i++) {
    324       1.14     oster 		/* pda */
    325       1.22     oster 		xorNode->params[2 * i + 0] = tmpNode->params[0];
    326       1.29     perry 		/* buf ptr */
    327       1.22     oster 		xorNode->params[2 * i + 1] = tmpNode->params[1];
    328       1.22     oster 		tmpNode = tmpNode->list_next;
    329        1.3     oster 	}
    330       1.22     oster 	tmpNode = rodNodes;
    331        1.3     oster 	for (i = 0; i < nRodNodes; i++) {
    332       1.14     oster 		/* pda */
    333       1.22     oster 		xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0];
    334       1.14     oster 		/* buf ptr */
    335       1.22     oster 		xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1];
    336       1.22     oster 		tmpNode = tmpNode->list_next;
    337        1.3     oster 	}
    338        1.3     oster 	/* xor node needs to get at RAID information */
    339        1.3     oster 	xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
    340        1.3     oster 
    341        1.3     oster 	/*
    342       1.14     oster          * Look for an Rod node that reads a complete SU. If none,
    343       1.14     oster          * alloc a buffer to receive the parity info. Note that we
    344       1.14     oster          * can't use a new data buffer because it will not have gotten
    345       1.14     oster          * written when the xor occurs.  */
    346        1.3     oster 	if (allowBufferRecycle) {
    347       1.22     oster 		tmpNode = rodNodes;
    348        1.3     oster 		for (i = 0; i < nRodNodes; i++) {
    349       1.22     oster 			if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
    350        1.3     oster 				break;
    351       1.22     oster 			tmpNode = tmpNode->list_next;
    352        1.3     oster 		}
    353        1.3     oster 	}
    354        1.3     oster 	if ((!allowBufferRecycle) || (i == nRodNodes)) {
    355       1.27     oster 		xorNode->results[0] = rf_AllocBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit));
    356        1.3     oster 	} else {
    357       1.22     oster 		/* this works because the only way we get here is if
    358       1.22     oster 		   allowBufferRecycle is true and we went through the
    359       1.22     oster 		   above for loop, and exited via the break before
    360       1.22     oster 		   i==nRodNodes was true.  That means tmpNode will
    361       1.22     oster 		   still point to a valid node -- the one we want for
    362       1.22     oster 		   here! */
    363       1.22     oster 		xorNode->results[0] = tmpNode->params[1].p;
    364        1.3     oster 	}
    365        1.3     oster 
    366        1.3     oster 	/* initialize the Wnp node */
    367       1.29     perry 	rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
    368       1.29     perry 		    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
    369       1.14     oster 		    dag_h, "Wnp", allocList);
    370        1.3     oster 	wnpNode->params[0].p = asmap->parityInfo;
    371        1.3     oster 	wnpNode->params[1].p = xorNode->results[0];
    372        1.3     oster 	wnpNode->params[2].v = parityStripeID;
    373       1.17     oster 	wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
    374        1.3     oster 	/* parityInfo must describe entire parity unit */
    375        1.3     oster 	RF_ASSERT(asmap->parityInfo->next == NULL);
    376        1.3     oster 
    377       1.20     oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    378        1.3     oster 	if (nfaults == 2) {
    379        1.3     oster 		/*
    380        1.3     oster 	         * We never try to recycle a buffer for the Q calcuation
    381        1.3     oster 	         * in addition to the parity. This would cause two buffers
    382        1.3     oster 	         * to get smashed during the P and Q calculation, guaranteeing
    383        1.3     oster 	         * one would be wrong.
    384        1.3     oster 	         */
    385       1.12     oster 		RF_MallocAndAdd(xorNode->results[1],
    386       1.12     oster 				rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
    387       1.12     oster 				(void *), allocList);
    388       1.29     perry 		rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
    389       1.29     perry 			    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc,
    390       1.14     oster 			    1, 1, 4, 0, dag_h, "Wnq", allocList);
    391        1.3     oster 		wnqNode->params[0].p = asmap->qInfo;
    392        1.3     oster 		wnqNode->params[1].p = xorNode->results[1];
    393        1.3     oster 		wnqNode->params[2].v = parityStripeID;
    394       1.17     oster 		wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
    395        1.3     oster 		/* parityInfo must describe entire parity unit */
    396        1.3     oster 		RF_ASSERT(asmap->parityInfo->next == NULL);
    397        1.3     oster 	}
    398       1.20     oster #endif
    399        1.3     oster 	/*
    400        1.3     oster          * Connect nodes to form graph.
    401        1.3     oster          */
    402        1.3     oster 
    403        1.3     oster 	/* connect dag header to block node */
    404        1.3     oster 	RF_ASSERT(blockNode->numAntecedents == 0);
    405        1.3     oster 	dag_h->succedents[0] = blockNode;
    406        1.3     oster 
    407        1.3     oster 	if (nRodNodes > 0) {
    408        1.3     oster 		/* connect the block node to the Rod nodes */
    409        1.3     oster 		RF_ASSERT(blockNode->numSuccedents == nRodNodes);
    410        1.3     oster 		RF_ASSERT(xorNode->numAntecedents == nRodNodes);
    411       1.22     oster 		tmpNode = rodNodes;
    412        1.3     oster 		for (i = 0; i < nRodNodes; i++) {
    413       1.28     oster 			RF_ASSERT(tmpNode->numAntecedents == 1);
    414       1.22     oster 			blockNode->succedents[i] = tmpNode;
    415       1.22     oster 			tmpNode->antecedents[0] = blockNode;
    416       1.22     oster 			tmpNode->antType[0] = rf_control;
    417        1.3     oster 
    418        1.3     oster 			/* connect the Rod nodes to the Xor node */
    419       1.28     oster 			RF_ASSERT(tmpNode->numSuccedents == 1);
    420       1.22     oster 			tmpNode->succedents[0] = xorNode;
    421       1.22     oster 			xorNode->antecedents[i] = tmpNode;
    422        1.3     oster 			xorNode->antType[i] = rf_trueData;
    423       1.22     oster 			tmpNode = tmpNode->list_next;
    424        1.3     oster 		}
    425        1.3     oster 	} else {
    426        1.3     oster 		/* connect the block node to the Xor node */
    427        1.3     oster 		RF_ASSERT(blockNode->numSuccedents == 1);
    428        1.3     oster 		RF_ASSERT(xorNode->numAntecedents == 1);
    429        1.3     oster 		blockNode->succedents[0] = xorNode;
    430        1.3     oster 		xorNode->antecedents[0] = blockNode;
    431        1.3     oster 		xorNode->antType[0] = rf_control;
    432        1.3     oster 	}
    433        1.3     oster 
    434        1.3     oster 	/* connect the xor node to the commit node */
    435        1.3     oster 	RF_ASSERT(xorNode->numSuccedents == 1);
    436        1.3     oster 	RF_ASSERT(commitNode->numAntecedents == 1);
    437        1.3     oster 	xorNode->succedents[0] = commitNode;
    438        1.3     oster 	commitNode->antecedents[0] = xorNode;
    439        1.3     oster 	commitNode->antType[0] = rf_control;
    440        1.3     oster 
    441        1.3     oster 	/* connect the commit node to the write nodes */
    442        1.3     oster 	RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
    443       1.22     oster 	tmpNode = wndNodes;
    444        1.3     oster 	for (i = 0; i < nWndNodes; i++) {
    445        1.3     oster 		RF_ASSERT(wndNodes->numAntecedents == 1);
    446       1.22     oster 		commitNode->succedents[i] = tmpNode;
    447       1.22     oster 		tmpNode->antecedents[0] = commitNode;
    448       1.22     oster 		tmpNode->antType[0] = rf_control;
    449       1.22     oster 		tmpNode = tmpNode->list_next;
    450        1.3     oster 	}
    451        1.3     oster 	RF_ASSERT(wnpNode->numAntecedents == 1);
    452        1.3     oster 	commitNode->succedents[nWndNodes] = wnpNode;
    453        1.3     oster 	wnpNode->antecedents[0] = commitNode;
    454        1.3     oster 	wnpNode->antType[0] = rf_trueData;
    455       1.20     oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    456        1.3     oster 	if (nfaults == 2) {
    457        1.3     oster 		RF_ASSERT(wnqNode->numAntecedents == 1);
    458        1.3     oster 		commitNode->succedents[nWndNodes + 1] = wnqNode;
    459        1.3     oster 		wnqNode->antecedents[0] = commitNode;
    460        1.3     oster 		wnqNode->antType[0] = rf_trueData;
    461        1.3     oster 	}
    462       1.20     oster #endif
    463        1.3     oster 	/* connect the write nodes to the term node */
    464        1.3     oster 	RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
    465        1.3     oster 	RF_ASSERT(termNode->numSuccedents == 0);
    466       1.22     oster 	tmpNode = wndNodes;
    467        1.3     oster 	for (i = 0; i < nWndNodes; i++) {
    468        1.3     oster 		RF_ASSERT(wndNodes->numSuccedents == 1);
    469       1.22     oster 		tmpNode->succedents[0] = termNode;
    470       1.22     oster 		termNode->antecedents[i] = tmpNode;
    471        1.3     oster 		termNode->antType[i] = rf_control;
    472       1.22     oster 		tmpNode = tmpNode->list_next;
    473        1.3     oster 	}
    474        1.3     oster 	RF_ASSERT(wnpNode->numSuccedents == 1);
    475        1.3     oster 	wnpNode->succedents[0] = termNode;
    476        1.3     oster 	termNode->antecedents[nWndNodes] = wnpNode;
    477        1.3     oster 	termNode->antType[nWndNodes] = rf_control;
    478       1.20     oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    479        1.3     oster 	if (nfaults == 2) {
    480        1.3     oster 		RF_ASSERT(wnqNode->numSuccedents == 1);
    481        1.3     oster 		wnqNode->succedents[0] = termNode;
    482        1.3     oster 		termNode->antecedents[nWndNodes + 1] = wnqNode;
    483        1.3     oster 		termNode->antType[nWndNodes + 1] = rf_control;
    484        1.3     oster 	}
    485       1.20     oster #endif
    486        1.1     oster }
    487        1.1     oster /******************************************************************************
    488        1.1     oster  *
    489        1.1     oster  * creates a DAG to perform a small-write operation (either raid 5 or pq),
    490        1.1     oster  * which is as follows:
    491        1.1     oster  *
    492        1.1     oster  * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
    493        1.1     oster  *            \- Rod X      /     \----> Wnd [Und]-/
    494        1.1     oster  *           [\- Rod X     /       \---> Wnd [Und]-/]
    495        1.1     oster  *           [\- Roq -> Q /         \--> Wnq [Unq]-/]
    496        1.1     oster  *
    497        1.1     oster  * Rop = read old parity
    498        1.1     oster  * Rod = read old data
    499        1.1     oster  * Roq = read old "q"
    500        1.1     oster  * Cmt = commit node
    501        1.1     oster  * Und = unlock data disk
    502        1.1     oster  * Unp = unlock parity disk
    503        1.1     oster  * Unq = unlock q disk
    504        1.1     oster  * Wnp = write new parity
    505        1.1     oster  * Wnd = write new data
    506        1.1     oster  * Wnq = write new "q"
    507        1.1     oster  * [ ] denotes optional segments in the graph
    508        1.1     oster  *
    509        1.1     oster  * Parameters:  raidPtr   - description of the physical array
    510        1.1     oster  *              asmap     - logical & physical addresses for this access
    511        1.1     oster  *              bp        - buffer ptr (holds write data)
    512        1.3     oster  *              flags     - general flags (e.g. disk locking)
    513        1.1     oster  *              allocList - list of memory allocated in DAG creation
    514        1.1     oster  *              pfuncs    - list of parity generating functions
    515        1.1     oster  *              qfuncs    - list of q generating functions
    516        1.1     oster  *
    517        1.1     oster  * A null qfuncs indicates single fault tolerant
    518        1.1     oster  *****************************************************************************/
    519        1.1     oster 
    520       1.29     perry void
    521       1.13     oster rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
    522       1.33  christos 			     RF_DagHeader_t *dag_h, void *bp,
    523       1.33  christos 			     RF_RaidAccessFlags_t flags,
    524       1.13     oster 			     RF_AllocListElem_t *allocList,
    525       1.13     oster 			     const RF_RedFuncs_t *pfuncs,
    526       1.13     oster 			     const RF_RedFuncs_t *qfuncs)
    527        1.1     oster {
    528  1.33.88.1      yamt 	RF_DagNode_t *readDataNodes, *readParityNodes, *termNode;
    529       1.22     oster 	RF_DagNode_t *tmpNode, *tmpreadDataNode, *tmpreadParityNode;
    530  1.33.88.1      yamt 	RF_DagNode_t *xorNodes, *blockNode, *commitNode;
    531  1.33.88.1      yamt 	RF_DagNode_t *writeDataNodes, *writeParityNodes;
    532  1.33.88.1      yamt 	RF_DagNode_t *tmpxorNode, *tmpwriteDataNode;
    533       1.22     oster 	RF_DagNode_t *tmpwriteParityNode;
    534       1.22     oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    535  1.33.88.1      yamt 	RF_DagNode_t *tmpwriteQNode, *tmpreadQNode, *tmpqNode, *readQNodes,
    536  1.33.88.1      yamt 	     *writeQNodes, *qNodes;
    537       1.22     oster #endif
    538  1.33.88.1      yamt 	int     i, j, nNodes;
    539        1.3     oster 	RF_ReconUnitNum_t which_ru;
    540        1.3     oster 	int     (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
    541  1.33.88.1      yamt 	int     (*qfunc) (RF_DagNode_t *) __unused;
    542        1.3     oster 	int     numDataNodes, numParityNodes;
    543        1.3     oster 	RF_StripeNum_t parityStripeID;
    544        1.3     oster 	RF_PhysDiskAddr_t *pda;
    545  1.33.88.1      yamt 	const char *name, *qname __unused;
    546        1.3     oster 	long    nfaults;
    547        1.3     oster 
    548        1.3     oster 	nfaults = qfuncs ? 2 : 1;
    549        1.3     oster 
    550        1.3     oster 	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
    551        1.3     oster 	    asmap->raidAddress, &which_ru);
    552        1.3     oster 	pda = asmap->physInfo;
    553        1.3     oster 	numDataNodes = asmap->numStripeUnitsAccessed;
    554        1.3     oster 	numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
    555        1.3     oster 
    556       1.19     oster #if RF_DEBUG_DAG
    557        1.3     oster 	if (rf_dagDebug) {
    558        1.3     oster 		printf("[Creating small-write DAG]\n");
    559        1.3     oster 	}
    560       1.19     oster #endif
    561        1.3     oster 	RF_ASSERT(numDataNodes > 0);
    562        1.3     oster 	dag_h->creator = "SmallWriteDAG";
    563        1.3     oster 
    564        1.3     oster 	dag_h->numCommitNodes = 1;
    565        1.3     oster 	dag_h->numCommits = 0;
    566        1.3     oster 	dag_h->numSuccedents = 1;
    567        1.3     oster 
    568        1.3     oster 	/*
    569        1.3     oster          * DAG creation occurs in four steps:
    570        1.3     oster          * 1. count the number of nodes in the DAG
    571        1.3     oster          * 2. create the nodes
    572        1.3     oster          * 3. initialize the nodes
    573        1.3     oster          * 4. connect the nodes
    574        1.3     oster          */
    575        1.3     oster 
    576        1.3     oster 	/*
    577        1.3     oster          * Step 1. compute number of nodes in the graph
    578        1.3     oster          */
    579        1.3     oster 
    580       1.14     oster 	/* number of nodes: a read and write for each data unit a
    581       1.14     oster 	 * redundancy computation node for each parity node (nfaults *
    582       1.14     oster 	 * nparity) a read and write for each parity unit a block and
    583       1.14     oster 	 * commit node (2) a terminate node if atomic RMW an unlock
    584  1.33.88.1      yamt 	 * node for each data unit, redundancy unit
    585  1.33.88.1      yamt 	 * totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
    586  1.33.88.1      yamt 	 *   + (nfaults * 2 * numParityNodes) + 3;
    587  1.33.88.1      yamt 	 */
    588  1.33.88.1      yamt 
    589        1.3     oster 	/*
    590        1.3     oster          * Step 2. create the nodes
    591        1.3     oster          */
    592       1.22     oster 
    593       1.22     oster 	blockNode = rf_AllocDAGNode();
    594       1.22     oster 	blockNode->list_next = dag_h->nodes;
    595       1.22     oster 	dag_h->nodes = blockNode;
    596       1.22     oster 
    597       1.22     oster 	commitNode = rf_AllocDAGNode();
    598       1.22     oster 	commitNode->list_next = dag_h->nodes;
    599       1.22     oster 	dag_h->nodes = commitNode;
    600       1.22     oster 
    601       1.22     oster 	for (i = 0; i < numDataNodes; i++) {
    602       1.22     oster 		tmpNode = rf_AllocDAGNode();
    603       1.22     oster 		tmpNode->list_next = dag_h->nodes;
    604       1.22     oster 		dag_h->nodes = tmpNode;
    605       1.22     oster 	}
    606       1.22     oster 	readDataNodes = dag_h->nodes;
    607       1.22     oster 
    608       1.22     oster 	for (i = 0; i < numParityNodes; i++) {
    609       1.22     oster 		tmpNode = rf_AllocDAGNode();
    610       1.22     oster 		tmpNode->list_next = dag_h->nodes;
    611       1.22     oster 		dag_h->nodes = tmpNode;
    612       1.22     oster 	}
    613       1.22     oster 	readParityNodes = dag_h->nodes;
    614       1.29     perry 
    615       1.22     oster 	for (i = 0; i < numDataNodes; i++) {
    616       1.22     oster 		tmpNode = rf_AllocDAGNode();
    617       1.22     oster 		tmpNode->list_next = dag_h->nodes;
    618       1.22     oster 		dag_h->nodes = tmpNode;
    619       1.22     oster 	}
    620       1.22     oster 	writeDataNodes = dag_h->nodes;
    621       1.22     oster 
    622       1.22     oster 	for (i = 0; i < numParityNodes; i++) {
    623       1.22     oster 		tmpNode = rf_AllocDAGNode();
    624       1.22     oster 		tmpNode->list_next = dag_h->nodes;
    625       1.22     oster 		dag_h->nodes = tmpNode;
    626       1.22     oster 	}
    627       1.22     oster 	writeParityNodes = dag_h->nodes;
    628       1.22     oster 
    629       1.22     oster 	for (i = 0; i < numParityNodes; i++) {
    630       1.22     oster 		tmpNode = rf_AllocDAGNode();
    631       1.22     oster 		tmpNode->list_next = dag_h->nodes;
    632       1.22     oster 		dag_h->nodes = tmpNode;
    633       1.22     oster 	}
    634       1.22     oster 	xorNodes = dag_h->nodes;
    635       1.22     oster 
    636       1.22     oster 	termNode = rf_AllocDAGNode();
    637       1.22     oster 	termNode->list_next = dag_h->nodes;
    638       1.22     oster 	dag_h->nodes = termNode;
    639       1.16     oster 
    640       1.20     oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    641        1.3     oster 	if (nfaults == 2) {
    642       1.22     oster 		for (i = 0; i < numParityNodes; i++) {
    643       1.22     oster 			tmpNode = rf_AllocDAGNode();
    644       1.22     oster 			tmpNode->list_next = dag_h->nodes;
    645       1.22     oster 			dag_h->nodes = tmpNode;
    646       1.22     oster 		}
    647       1.22     oster 		readQNodes = dag_h->nodes;
    648       1.22     oster 
    649       1.22     oster 		for (i = 0; i < numParityNodes; i++) {
    650       1.22     oster 			tmpNode = rf_AllocDAGNode();
    651       1.22     oster 			tmpNode->list_next = dag_h->nodes;
    652       1.22     oster 			dag_h->nodes = tmpNode;
    653       1.22     oster 		}
    654       1.22     oster 		writeQNodes = dag_h->nodes;
    655       1.22     oster 
    656       1.22     oster 		for (i = 0; i < numParityNodes; i++) {
    657       1.22     oster 			tmpNode = rf_AllocDAGNode();
    658       1.22     oster 			tmpNode->list_next = dag_h->nodes;
    659       1.22     oster 			dag_h->nodes = tmpNode;
    660       1.22     oster 		}
    661       1.22     oster 		qNodes = dag_h->nodes;
    662        1.3     oster 	} else {
    663       1.18     oster 		readQNodes = writeQNodes = qNodes = NULL;
    664        1.3     oster 	}
    665       1.20     oster #endif
    666        1.3     oster 
    667        1.3     oster 	/*
    668        1.3     oster          * Step 3. initialize the nodes
    669        1.3     oster          */
    670        1.3     oster 	/* initialize block node (Nil) */
    671        1.3     oster 	nNodes = numDataNodes + (nfaults * numParityNodes);
    672       1.29     perry 	rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
    673       1.29     perry 		    rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0,
    674       1.14     oster 		    dag_h, "Nil", allocList);
    675        1.3     oster 
    676        1.3     oster 	/* initialize commit node (Cmt) */
    677       1.29     perry 	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
    678       1.29     perry 		    rf_NullNodeUndoFunc, NULL, nNodes,
    679       1.14     oster 		    (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
    680        1.3     oster 
    681        1.3     oster 	/* initialize terminate node (Trm) */
    682       1.29     perry 	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
    683       1.29     perry 		    rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0,
    684       1.14     oster 		    dag_h, "Trm", allocList);
    685        1.3     oster 
    686        1.3     oster 	/* initialize nodes which read old data (Rod) */
    687       1.22     oster 	tmpreadDataNode = readDataNodes;
    688        1.3     oster 	for (i = 0; i < numDataNodes; i++) {
    689       1.29     perry 		rf_InitNode(tmpreadDataNode, rf_wait, RF_FALSE,
    690       1.14     oster 			    rf_DiskReadFunc, rf_DiskReadUndoFunc,
    691       1.29     perry 			    rf_GenericWakeupFunc, (nfaults * numParityNodes),
    692       1.14     oster 			    1, 4, 0, dag_h, "Rod", allocList);
    693        1.3     oster 		RF_ASSERT(pda != NULL);
    694        1.3     oster 		/* physical disk addr desc */
    695       1.22     oster 		tmpreadDataNode->params[0].p = pda;
    696        1.3     oster 		/* buffer to hold old data */
    697       1.27     oster 		tmpreadDataNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
    698       1.22     oster 		tmpreadDataNode->params[2].v = parityStripeID;
    699       1.22     oster 		tmpreadDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    700       1.17     oster 		    which_ru);
    701        1.3     oster 		pda = pda->next;
    702       1.22     oster 		for (j = 0; j < tmpreadDataNode->numSuccedents; j++) {
    703       1.22     oster 			tmpreadDataNode->propList[j] = NULL;
    704        1.3     oster 		}
    705       1.22     oster 		tmpreadDataNode = tmpreadDataNode->list_next;
    706        1.3     oster 	}
    707        1.3     oster 
    708        1.3     oster 	/* initialize nodes which read old parity (Rop) */
    709        1.3     oster 	pda = asmap->parityInfo;
    710        1.3     oster 	i = 0;
    711       1.22     oster 	tmpreadParityNode = readParityNodes;
    712        1.3     oster 	for (i = 0; i < numParityNodes; i++) {
    713        1.3     oster 		RF_ASSERT(pda != NULL);
    714       1.29     perry 		rf_InitNode(tmpreadParityNode, rf_wait, RF_FALSE,
    715       1.14     oster 			    rf_DiskReadFunc, rf_DiskReadUndoFunc,
    716       1.29     perry 			    rf_GenericWakeupFunc, numParityNodes, 1, 4, 0,
    717       1.14     oster 			    dag_h, "Rop", allocList);
    718       1.22     oster 		tmpreadParityNode->params[0].p = pda;
    719        1.3     oster 		/* buffer to hold old parity */
    720       1.27     oster 		tmpreadParityNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
    721       1.22     oster 		tmpreadParityNode->params[2].v = parityStripeID;
    722       1.22     oster 		tmpreadParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    723       1.17     oster 		    which_ru);
    724        1.3     oster 		pda = pda->next;
    725       1.22     oster 		for (j = 0; j < tmpreadParityNode->numSuccedents; j++) {
    726       1.22     oster 			tmpreadParityNode->propList[0] = NULL;
    727        1.3     oster 		}
    728       1.22     oster 		tmpreadParityNode = tmpreadParityNode->list_next;
    729        1.3     oster 	}
    730        1.3     oster 
    731       1.20     oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    732        1.3     oster 	/* initialize nodes which read old Q (Roq) */
    733        1.3     oster 	if (nfaults == 2) {
    734        1.3     oster 		pda = asmap->qInfo;
    735       1.22     oster 		tmpreadQNode = readQNodes;
    736        1.3     oster 		for (i = 0; i < numParityNodes; i++) {
    737        1.3     oster 			RF_ASSERT(pda != NULL);
    738       1.29     perry 			rf_InitNode(tmpreadQNode, rf_wait, RF_FALSE,
    739       1.14     oster 				    rf_DiskReadFunc, rf_DiskReadUndoFunc,
    740       1.29     perry 				    rf_GenericWakeupFunc, numParityNodes,
    741       1.14     oster 				    1, 4, 0, dag_h, "Roq", allocList);
    742       1.22     oster 			tmpreadQNode->params[0].p = pda;
    743        1.3     oster 			/* buffer to hold old Q */
    744       1.24     oster 			tmpreadQNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h,
    745       1.24     oster 								   pda->numSector << raidPtr->logBytesPerSector);
    746       1.22     oster 			tmpreadQNode->params[2].v = parityStripeID;
    747       1.22     oster 			tmpreadQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    748       1.17     oster 			    which_ru);
    749        1.3     oster 			pda = pda->next;
    750       1.22     oster 			for (j = 0; j < tmpreadQNode->numSuccedents; j++) {
    751       1.22     oster 				tmpreadQNode->propList[0] = NULL;
    752        1.3     oster 			}
    753       1.22     oster 			tmpreadQNode = tmpreadQNode->list_next;
    754        1.3     oster 		}
    755        1.3     oster 	}
    756       1.20     oster #endif
    757        1.3     oster 	/* initialize nodes which write new data (Wnd) */
    758        1.3     oster 	pda = asmap->physInfo;
    759       1.22     oster 	tmpwriteDataNode = writeDataNodes;
    760        1.3     oster 	for (i = 0; i < numDataNodes; i++) {
    761        1.3     oster 		RF_ASSERT(pda != NULL);
    762       1.29     perry 		rf_InitNode(tmpwriteDataNode, rf_wait, RF_FALSE,
    763       1.29     perry 			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    764       1.14     oster 			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
    765       1.14     oster 			    "Wnd", allocList);
    766        1.3     oster 		/* physical disk addr desc */
    767       1.22     oster 		tmpwriteDataNode->params[0].p = pda;
    768        1.3     oster 		/* buffer holding new data to be written */
    769       1.22     oster 		tmpwriteDataNode->params[1].p = pda->bufPtr;
    770       1.22     oster 		tmpwriteDataNode->params[2].v = parityStripeID;
    771       1.22     oster 		tmpwriteDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    772       1.17     oster 		    which_ru);
    773        1.3     oster 		pda = pda->next;
    774       1.22     oster 		tmpwriteDataNode = tmpwriteDataNode->list_next;
    775        1.3     oster 	}
    776        1.3     oster 
    777        1.3     oster 	/*
    778        1.3     oster          * Initialize nodes which compute new parity and Q.
    779        1.3     oster          */
    780        1.3     oster 	/*
    781        1.3     oster          * We use the simple XOR func in the double-XOR case, and when
    782       1.14     oster          * we're accessing only a portion of one stripe unit. The
    783       1.14     oster          * distinction between the two is that the regular XOR func
    784       1.14     oster          * assumes that the targbuf is a full SU in size, and examines
    785       1.14     oster          * the pda associated with the buffer to decide where within
    786       1.14     oster          * the buffer to XOR the data, whereas the simple XOR func
    787       1.14     oster          * just XORs the data into the start of the buffer.  */
    788        1.3     oster 	if ((numParityNodes == 2) || ((numDataNodes == 1)
    789       1.29     perry 		&& (asmap->totalSectorsAccessed <
    790       1.14     oster 		    raidPtr->Layout.sectorsPerStripeUnit))) {
    791        1.3     oster 		func = pfuncs->simple;
    792        1.3     oster 		undoFunc = rf_NullNodeUndoFunc;
    793        1.3     oster 		name = pfuncs->SimpleName;
    794        1.3     oster 		if (qfuncs) {
    795        1.3     oster 			qfunc = qfuncs->simple;
    796        1.3     oster 			qname = qfuncs->SimpleName;
    797        1.3     oster 		} else {
    798        1.3     oster 			qfunc = NULL;
    799        1.3     oster 			qname = NULL;
    800        1.3     oster 		}
    801        1.3     oster 	} else {
    802        1.3     oster 		func = pfuncs->regular;
    803        1.3     oster 		undoFunc = rf_NullNodeUndoFunc;
    804        1.3     oster 		name = pfuncs->RegularName;
    805        1.3     oster 		if (qfuncs) {
    806        1.3     oster 			qfunc = qfuncs->regular;
    807        1.3     oster 			qname = qfuncs->RegularName;
    808        1.3     oster 		} else {
    809        1.3     oster 			qfunc = NULL;
    810        1.3     oster 			qname = NULL;
    811        1.3     oster 		}
    812        1.3     oster 	}
    813        1.3     oster 	/*
    814        1.3     oster          * Initialize the xor nodes: params are {pda,buf}
    815        1.3     oster          * from {Rod,Wnd,Rop} nodes, and raidPtr
    816        1.3     oster          */
    817        1.3     oster 	if (numParityNodes == 2) {
    818        1.3     oster 		/* double-xor case */
    819       1.22     oster 		tmpxorNode = xorNodes;
    820       1.22     oster 		tmpreadDataNode = readDataNodes;
    821       1.22     oster 		tmpreadParityNode = readParityNodes;
    822       1.22     oster 		tmpwriteDataNode = writeDataNodes;
    823  1.33.88.1      yamt #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    824       1.22     oster 		tmpqNode = qNodes;
    825       1.22     oster 		tmpreadQNode = readQNodes;
    826  1.33.88.1      yamt #endif
    827        1.3     oster 		for (i = 0; i < numParityNodes; i++) {
    828        1.3     oster 			/* note: no wakeup func for xor */
    829       1.29     perry 			rf_InitNode(tmpxorNode, rf_wait, RF_FALSE, func,
    830       1.29     perry 				    undoFunc, NULL, 1,
    831       1.29     perry 				    (numDataNodes + numParityNodes),
    832       1.14     oster 				    7, 1, dag_h, name, allocList);
    833       1.22     oster 			tmpxorNode->flags |= RF_DAGNODE_FLAG_YIELD;
    834       1.22     oster 			tmpxorNode->params[0] = tmpreadDataNode->params[0];
    835       1.22     oster 			tmpxorNode->params[1] = tmpreadDataNode->params[1];
    836       1.22     oster 			tmpxorNode->params[2] = tmpreadParityNode->params[0];
    837       1.22     oster 			tmpxorNode->params[3] = tmpreadParityNode->params[1];
    838       1.22     oster 			tmpxorNode->params[4] = tmpwriteDataNode->params[0];
    839       1.22     oster 			tmpxorNode->params[5] = tmpwriteDataNode->params[1];
    840       1.22     oster 			tmpxorNode->params[6].p = raidPtr;
    841        1.3     oster 			/* use old parity buf as target buf */
    842       1.22     oster 			tmpxorNode->results[0] = tmpreadParityNode->params[1].p;
    843       1.20     oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    844        1.3     oster 			if (nfaults == 2) {
    845        1.3     oster 				/* note: no wakeup func for qor */
    846       1.29     perry 				rf_InitNode(tmpqNode, rf_wait, RF_FALSE,
    847       1.14     oster 					    qfunc, undoFunc, NULL, 1,
    848       1.29     perry 					    (numDataNodes + numParityNodes),
    849       1.14     oster 					    7, 1, dag_h, qname, allocList);
    850       1.22     oster 				tmpqNode->params[0] = tmpreadDataNode->params[0];
    851       1.22     oster 				tmpqNode->params[1] = tmpreadDataNode->params[1];
    852       1.22     oster 				tmpqNode->params[2] = tmpreadQNode->.params[0];
    853       1.22     oster 				tmpqNode->params[3] = tmpreadQNode->params[1];
    854       1.22     oster 				tmpqNode->params[4] = tmpwriteDataNode->params[0];
    855       1.22     oster 				tmpqNode->params[5] = tmpwriteDataNode->params[1];
    856       1.22     oster 				tmpqNode->params[6].p = raidPtr;
    857        1.3     oster 				/* use old Q buf as target buf */
    858       1.22     oster 				tmpqNode->results[0] = tmpreadQNode->params[1].p;
    859       1.22     oster 				tmpqNode = tmpqNode->list_next;
    860       1.22     oster 				tmpreadQNodes = tmpreadQNodes->list_next;
    861        1.3     oster 			}
    862       1.20     oster #endif
    863       1.22     oster 			tmpxorNode = tmpxorNode->list_next;
    864       1.22     oster 			tmpreadDataNode = tmpreadDataNode->list_next;
    865       1.22     oster 			tmpreadParityNode = tmpreadParityNode->list_next;
    866       1.22     oster 			tmpwriteDataNode = tmpwriteDataNode->list_next;
    867        1.3     oster 		}
    868        1.3     oster 	} else {
    869        1.3     oster 		/* there is only one xor node in this case */
    870       1.29     perry 		rf_InitNode(xorNodes, rf_wait, RF_FALSE, func,
    871       1.14     oster 			    undoFunc, NULL, 1, (numDataNodes + numParityNodes),
    872       1.29     perry 			    (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
    873       1.14     oster 			    dag_h, name, allocList);
    874       1.22     oster 		xorNodes->flags |= RF_DAGNODE_FLAG_YIELD;
    875       1.22     oster 		tmpreadDataNode = readDataNodes;
    876       1.29     perry 		for (i = 0; i < numDataNodes; i++) { /* used to be"numDataNodes + 1" until we factored
    877       1.22     oster 							out the "+1" into the "deal with Rop separately below */
    878       1.22     oster 			/* set up params related to Rod nodes */
    879       1.22     oster 			xorNodes->params[2 * i + 0] = tmpreadDataNode->params[0];	/* pda */
    880       1.22     oster 			xorNodes->params[2 * i + 1] = tmpreadDataNode->params[1];	/* buffer ptr */
    881       1.22     oster 			tmpreadDataNode = tmpreadDataNode->list_next;
    882       1.22     oster 		}
    883       1.22     oster 		/* deal with Rop separately */
    884       1.22     oster 		xorNodes->params[2 * numDataNodes + 0] = readParityNodes->params[0];    /* pda */
    885       1.22     oster 		xorNodes->params[2 * numDataNodes + 1] = readParityNodes->params[1];    /* buffer ptr */
    886       1.22     oster 
    887       1.22     oster 		tmpwriteDataNode = writeDataNodes;
    888        1.3     oster 		for (i = 0; i < numDataNodes; i++) {
    889        1.3     oster 			/* set up params related to Wnd and Wnp nodes */
    890       1.22     oster 			xorNodes->params[2 * (numDataNodes + 1 + i) + 0] =	/* pda */
    891       1.22     oster 			    tmpwriteDataNode->params[0];
    892       1.22     oster 			xorNodes->params[2 * (numDataNodes + 1 + i) + 1] =	/* buffer ptr */
    893       1.22     oster 			    tmpwriteDataNode->params[1];
    894       1.22     oster 			tmpwriteDataNode = tmpwriteDataNode->list_next;
    895        1.3     oster 		}
    896        1.3     oster 		/* xor node needs to get at RAID information */
    897       1.22     oster 		xorNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
    898       1.22     oster 		xorNodes->results[0] = readParityNodes->params[1].p;
    899       1.20     oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    900        1.3     oster 		if (nfaults == 2) {
    901       1.29     perry 			rf_InitNode(qNodes, rf_wait, RF_FALSE, qfunc,
    902       1.14     oster 				    undoFunc, NULL, 1,
    903       1.14     oster 				    (numDataNodes + numParityNodes),
    904       1.14     oster 				    (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
    905       1.14     oster 				    dag_h, qname, allocList);
    906       1.22     oster 			tmpreadDataNode = readDataNodes;
    907        1.3     oster 			for (i = 0; i < numDataNodes; i++) {
    908        1.3     oster 				/* set up params related to Rod */
    909       1.22     oster 				qNodes->params[2 * i + 0] = tmpreadDataNode->params[0];	/* pda */
    910       1.22     oster 				qNodes->params[2 * i + 1] = tmpreadDataNode->params[1];	/* buffer ptr */
    911       1.22     oster 				tmpreadDataNode = tmpreadDataNode->list_next;
    912        1.3     oster 			}
    913        1.3     oster 			/* and read old q */
    914       1.22     oster 			qNodes->params[2 * numDataNodes + 0] =	/* pda */
    915       1.22     oster 			    readQNodes->params[0];
    916       1.22     oster 			qNodes->params[2 * numDataNodes + 1] =	/* buffer ptr */
    917       1.22     oster 			    readQNodes->params[1];
    918       1.22     oster 			tmpwriteDataNode = writeDataNodes;
    919        1.3     oster 			for (i = 0; i < numDataNodes; i++) {
    920        1.3     oster 				/* set up params related to Wnd nodes */
    921       1.22     oster 				qNodes->params[2 * (numDataNodes + 1 + i) + 0] =	/* pda */
    922       1.22     oster 				    tmpwriteDataNode->params[0];
    923       1.22     oster 				qNodes->params[2 * (numDataNodes + 1 + i) + 1] =	/* buffer ptr */
    924       1.22     oster 				    tmpwriteDataNode->params[1];
    925       1.22     oster 				tmpwriteDataNode = tmpwriteDataNode->list_next;
    926        1.3     oster 			}
    927        1.3     oster 			/* xor node needs to get at RAID information */
    928       1.22     oster 			qNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
    929       1.22     oster 			qNodes->results[0] = readQNodes->params[1].p;
    930        1.3     oster 		}
    931       1.20     oster #endif
    932        1.3     oster 	}
    933        1.3     oster 
    934        1.3     oster 	/* initialize nodes which write new parity (Wnp) */
    935        1.3     oster 	pda = asmap->parityInfo;
    936       1.22     oster 	tmpwriteParityNode = writeParityNodes;
    937       1.22     oster 	tmpxorNode = xorNodes;
    938        1.3     oster 	for (i = 0; i < numParityNodes; i++) {
    939       1.29     perry 		rf_InitNode(tmpwriteParityNode, rf_wait, RF_FALSE,
    940       1.14     oster 			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    941       1.14     oster 			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
    942       1.14     oster 			    "Wnp", allocList);
    943        1.3     oster 		RF_ASSERT(pda != NULL);
    944       1.22     oster 		tmpwriteParityNode->params[0].p = pda;	/* param 1 (bufPtr)
    945       1.22     oster 				  			 * filled in by xor node */
    946       1.22     oster 		tmpwriteParityNode->params[1].p = tmpxorNode->results[0];	/* buffer pointer for
    947       1.22     oster 				  						 * parity write
    948       1.22     oster 				  						 * operation */
    949       1.22     oster 		tmpwriteParityNode->params[2].v = parityStripeID;
    950       1.22     oster 		tmpwriteParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    951       1.17     oster 		    which_ru);
    952        1.3     oster 		pda = pda->next;
    953       1.22     oster 		tmpwriteParityNode = tmpwriteParityNode->list_next;
    954       1.22     oster 		tmpxorNode = tmpxorNode->list_next;
    955        1.3     oster 	}
    956        1.3     oster 
    957       1.20     oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
    958        1.3     oster 	/* initialize nodes which write new Q (Wnq) */
    959        1.3     oster 	if (nfaults == 2) {
    960        1.3     oster 		pda = asmap->qInfo;
    961       1.22     oster 		tmpwriteQNode = writeQNodes;
    962       1.22     oster 		tmpqNode = qNodes;
    963        1.3     oster 		for (i = 0; i < numParityNodes; i++) {
    964       1.29     perry 			rf_InitNode(tmpwriteQNode, rf_wait, RF_FALSE,
    965       1.29     perry 				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    966       1.14     oster 				    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
    967       1.14     oster 				    "Wnq", allocList);
    968        1.3     oster 			RF_ASSERT(pda != NULL);
    969       1.22     oster 			tmpwriteQNode->params[0].p = pda;	/* param 1 (bufPtr)
    970        1.3     oster 								 * filled in by xor node */
    971       1.22     oster 			tmpwriteQNode->params[1].p = tmpqNode->results[0];	/* buffer pointer for
    972        1.3     oster 										 * parity write
    973        1.3     oster 										 * operation */
    974       1.22     oster 			tmpwriteQNode->params[2].v = parityStripeID;
    975       1.22     oster 			tmpwriteQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    976       1.17     oster 			    which_ru);
    977        1.3     oster 			pda = pda->next;
    978       1.22     oster 			tmpwriteQNode = tmpwriteQNode->list_next;
    979       1.22     oster 			tmpqNode = tmpqNode->list_next;
    980        1.3     oster 		}
    981        1.3     oster 	}
    982       1.20     oster #endif
    983        1.3     oster 	/*
    984        1.3     oster          * Step 4. connect the nodes.
    985        1.3     oster          */
    986        1.3     oster 
    987        1.3     oster 	/* connect header to block node */
    988        1.3     oster 	dag_h->succedents[0] = blockNode;
    989        1.3     oster 
    990        1.3     oster 	/* connect block node to read old data nodes */
    991        1.3     oster 	RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
    992       1.22     oster 	tmpreadDataNode = readDataNodes;
    993        1.3     oster 	for (i = 0; i < numDataNodes; i++) {
    994       1.22     oster 		blockNode->succedents[i] = tmpreadDataNode;
    995       1.22     oster 		RF_ASSERT(tmpreadDataNode->numAntecedents == 1);
    996       1.22     oster 		tmpreadDataNode->antecedents[0] = blockNode;
    997       1.22     oster 		tmpreadDataNode->antType[0] = rf_control;
    998       1.22     oster 		tmpreadDataNode = tmpreadDataNode->list_next;
    999        1.3     oster 	}
   1000        1.3     oster 
   1001        1.3     oster 	/* connect block node to read old parity nodes */
   1002       1.22     oster 	tmpreadParityNode = readParityNodes;
   1003        1.3     oster 	for (i = 0; i < numParityNodes; i++) {
   1004       1.22     oster 		blockNode->succedents[numDataNodes + i] = tmpreadParityNode;
   1005       1.22     oster 		RF_ASSERT(tmpreadParityNode->numAntecedents == 1);
   1006       1.22     oster 		tmpreadParityNode->antecedents[0] = blockNode;
   1007       1.22     oster 		tmpreadParityNode->antType[0] = rf_control;
   1008       1.22     oster 		tmpreadParityNode = tmpreadParityNode->list_next;
   1009        1.3     oster 	}
   1010        1.3     oster 
   1011       1.20     oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   1012        1.3     oster 	/* connect block node to read old Q nodes */
   1013        1.3     oster 	if (nfaults == 2) {
   1014       1.22     oster 		tmpreadQNode = readQNodes;
   1015        1.3     oster 		for (i = 0; i < numParityNodes; i++) {
   1016       1.22     oster 			blockNode->succedents[numDataNodes + numParityNodes + i] = tmpreadQNode;
   1017       1.22     oster 			RF_ASSERT(tmpreadQNode->numAntecedents == 1);
   1018       1.22     oster 			tmpreadQNode->antecedents[0] = blockNode;
   1019       1.22     oster 			tmpreadQNode->antType[0] = rf_control;
   1020       1.22     oster 			tmpreadQNode = tmpreadQNode->list_next;
   1021        1.3     oster 		}
   1022        1.3     oster 	}
   1023       1.20     oster #endif
   1024        1.3     oster 	/* connect read old data nodes to xor nodes */
   1025       1.22     oster 	tmpreadDataNode = readDataNodes;
   1026        1.3     oster 	for (i = 0; i < numDataNodes; i++) {
   1027       1.22     oster 		RF_ASSERT(tmpreadDataNode->numSuccedents == (nfaults * numParityNodes));
   1028       1.22     oster 		tmpxorNode = xorNodes;
   1029        1.3     oster 		for (j = 0; j < numParityNodes; j++) {
   1030       1.22     oster 			RF_ASSERT(tmpxorNode->numAntecedents == numDataNodes + numParityNodes);
   1031       1.22     oster 			tmpreadDataNode->succedents[j] = tmpxorNode;
   1032       1.22     oster 			tmpxorNode->antecedents[i] = tmpreadDataNode;
   1033       1.22     oster 			tmpxorNode->antType[i] = rf_trueData;
   1034       1.22     oster 			tmpxorNode = tmpxorNode->list_next;
   1035        1.3     oster 		}
   1036       1.22     oster 		tmpreadDataNode = tmpreadDataNode->list_next;
   1037        1.3     oster 	}
   1038        1.3     oster 
   1039       1.20     oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   1040        1.3     oster 	/* connect read old data nodes to q nodes */
   1041        1.3     oster 	if (nfaults == 2) {
   1042       1.22     oster 		tmpreadDataNode = readDataNodes;
   1043        1.3     oster 		for (i = 0; i < numDataNodes; i++) {
   1044       1.22     oster 			tmpqNode = qNodes;
   1045        1.3     oster 			for (j = 0; j < numParityNodes; j++) {
   1046       1.22     oster 				RF_ASSERT(tmpqNode->numAntecedents == numDataNodes + numParityNodes);
   1047       1.22     oster 				tmpreadDataNode->succedents[numParityNodes + j] = tmpqNode;
   1048       1.22     oster 				tmpqNode->antecedents[i] = tmpreadDataNode;
   1049       1.22     oster 				tmpqNode->antType[i] = rf_trueData;
   1050       1.22     oster 				tmpqNode = tmpqNode->list_next;
   1051        1.3     oster 			}
   1052       1.22     oster 			tmpreadDataNode = tmpreadDataNode->list_next;
   1053        1.3     oster 		}
   1054        1.3     oster 	}
   1055       1.20     oster #endif
   1056        1.3     oster 	/* connect read old parity nodes to xor nodes */
   1057       1.22     oster 	tmpreadParityNode = readParityNodes;
   1058        1.3     oster 	for (i = 0; i < numParityNodes; i++) {
   1059       1.22     oster 		RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
   1060       1.22     oster 		tmpxorNode = xorNodes;
   1061        1.3     oster 		for (j = 0; j < numParityNodes; j++) {
   1062       1.22     oster 			tmpreadParityNode->succedents[j] = tmpxorNode;
   1063       1.22     oster 			tmpxorNode->antecedents[numDataNodes + i] = tmpreadParityNode;
   1064       1.22     oster 			tmpxorNode->antType[numDataNodes + i] = rf_trueData;
   1065       1.22     oster 			tmpxorNode = tmpxorNode->list_next;
   1066        1.3     oster 		}
   1067       1.22     oster 		tmpreadParityNode = tmpreadParityNode->list_next;
   1068        1.3     oster 	}
   1069        1.3     oster 
   1070       1.20     oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   1071        1.3     oster 	/* connect read old q nodes to q nodes */
   1072        1.3     oster 	if (nfaults == 2) {
   1073       1.22     oster 		tmpreadParityNode = readParityNodes;
   1074       1.22     oster 		tmpreadQNode = readQNodes;
   1075        1.3     oster 		for (i = 0; i < numParityNodes; i++) {
   1076       1.22     oster 			RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
   1077       1.22     oster 			tmpqNode = qNodes;
   1078        1.3     oster 			for (j = 0; j < numParityNodes; j++) {
   1079       1.22     oster 				tmpreadQNode->succedents[j] = tmpqNode;
   1080       1.22     oster 				tmpqNode->antecedents[numDataNodes + i] = tmpreadQNodes;
   1081       1.22     oster 				tmpqNode->antType[numDataNodes + i] = rf_trueData;
   1082       1.22     oster 				tmpqNode = tmpqNode->list_next;
   1083        1.3     oster 			}
   1084       1.22     oster 			tmpreadParityNode = tmpreadParityNode->list_next;
   1085       1.22     oster 			tmpreadQNode = tmpreadQNode->list_next;
   1086        1.3     oster 		}
   1087        1.3     oster 	}
   1088       1.20     oster #endif
   1089        1.3     oster 	/* connect xor nodes to commit node */
   1090        1.3     oster 	RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
   1091       1.22     oster 	tmpxorNode = xorNodes;
   1092        1.3     oster 	for (i = 0; i < numParityNodes; i++) {
   1093       1.22     oster 		RF_ASSERT(tmpxorNode->numSuccedents == 1);
   1094       1.22     oster 		tmpxorNode->succedents[0] = commitNode;
   1095       1.22     oster 		commitNode->antecedents[i] = tmpxorNode;
   1096        1.3     oster 		commitNode->antType[i] = rf_control;
   1097       1.22     oster 		tmpxorNode = tmpxorNode->list_next;
   1098        1.3     oster 	}
   1099        1.3     oster 
   1100       1.20     oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   1101        1.3     oster 	/* connect q nodes to commit node */
   1102        1.3     oster 	if (nfaults == 2) {
   1103       1.22     oster 		tmpqNode = qNodes;
   1104        1.3     oster 		for (i = 0; i < numParityNodes; i++) {
   1105       1.22     oster 			RF_ASSERT(tmpqNode->numSuccedents == 1);
   1106       1.22     oster 			tmpqNode->succedents[0] = commitNode;
   1107       1.22     oster 			commitNode->antecedents[i + numParityNodes] = tmpqNode;
   1108        1.3     oster 			commitNode->antType[i + numParityNodes] = rf_control;
   1109       1.22     oster 			tmpqNode = tmpqNode->list_next;
   1110        1.3     oster 		}
   1111        1.3     oster 	}
   1112       1.20     oster #endif
   1113        1.3     oster 	/* connect commit node to write nodes */
   1114        1.3     oster 	RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
   1115       1.22     oster 	tmpwriteDataNode = writeDataNodes;
   1116        1.3     oster 	for (i = 0; i < numDataNodes; i++) {
   1117       1.28     oster 		RF_ASSERT(tmpwriteDataNode->numAntecedents == 1);
   1118       1.22     oster 		commitNode->succedents[i] = tmpwriteDataNode;
   1119       1.22     oster 		tmpwriteDataNode->antecedents[0] = commitNode;
   1120       1.22     oster 		tmpwriteDataNode->antType[0] = rf_trueData;
   1121       1.22     oster 		tmpwriteDataNode = tmpwriteDataNode->list_next;
   1122        1.3     oster 	}
   1123       1.22     oster 	tmpwriteParityNode = writeParityNodes;
   1124        1.3     oster 	for (i = 0; i < numParityNodes; i++) {
   1125       1.22     oster 		RF_ASSERT(tmpwriteParityNode->numAntecedents == 1);
   1126       1.22     oster 		commitNode->succedents[i + numDataNodes] = tmpwriteParityNode;
   1127       1.22     oster 		tmpwriteParityNode->antecedents[0] = commitNode;
   1128       1.22     oster 		tmpwriteParityNode->antType[0] = rf_trueData;
   1129       1.22     oster 		tmpwriteParityNode = tmpwriteParityNode->list_next;
   1130        1.3     oster 	}
   1131       1.20     oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   1132        1.3     oster 	if (nfaults == 2) {
   1133       1.22     oster 		tmpwriteQNode = writeQNodes;
   1134        1.3     oster 		for (i = 0; i < numParityNodes; i++) {
   1135       1.22     oster 			RF_ASSERT(tmpwriteQNode->numAntecedents == 1);
   1136       1.22     oster 			commitNode->succedents[i + numDataNodes + numParityNodes] = tmpwriteQNode;
   1137       1.22     oster 			tmpwriteQNode->antecedents[0] = commitNode;
   1138       1.22     oster 			tmpwriteQNode->antType[0] = rf_trueData;
   1139       1.22     oster 			tmpwriteQNode = tmpwriteQNode->list_next;
   1140        1.3     oster 		}
   1141        1.3     oster 	}
   1142       1.20     oster #endif
   1143        1.3     oster 	RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
   1144        1.3     oster 	RF_ASSERT(termNode->numSuccedents == 0);
   1145       1.22     oster 	tmpwriteDataNode = writeDataNodes;
   1146        1.3     oster 	for (i = 0; i < numDataNodes; i++) {
   1147       1.16     oster 		/* connect write new data nodes to term node */
   1148       1.22     oster 		RF_ASSERT(tmpwriteDataNode->numSuccedents == 1);
   1149       1.16     oster 		RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
   1150       1.22     oster 		tmpwriteDataNode->succedents[0] = termNode;
   1151       1.22     oster 		termNode->antecedents[i] = tmpwriteDataNode;
   1152       1.16     oster 		termNode->antType[i] = rf_control;
   1153       1.22     oster 		tmpwriteDataNode = tmpwriteDataNode->list_next;
   1154        1.3     oster 	}
   1155        1.3     oster 
   1156       1.22     oster 	tmpwriteParityNode = writeParityNodes;
   1157        1.3     oster 	for (i = 0; i < numParityNodes; i++) {
   1158       1.22     oster 		RF_ASSERT(tmpwriteParityNode->numSuccedents == 1);
   1159       1.22     oster 		tmpwriteParityNode->succedents[0] = termNode;
   1160       1.22     oster 		termNode->antecedents[numDataNodes + i] = tmpwriteParityNode;
   1161       1.16     oster 		termNode->antType[numDataNodes + i] = rf_control;
   1162       1.22     oster 		tmpwriteParityNode = tmpwriteParityNode->list_next;
   1163        1.3     oster 	}
   1164        1.3     oster 
   1165       1.20     oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   1166        1.3     oster 	if (nfaults == 2) {
   1167       1.22     oster 		tmpwriteQNode = writeQNodes;
   1168        1.3     oster 		for (i = 0; i < numParityNodes; i++) {
   1169       1.22     oster 			RF_ASSERT(tmpwriteQNode->numSuccedents == 1);
   1170       1.22     oster 			tmpwriteQNode->succedents[0] = termNode;
   1171       1.22     oster 			termNode->antecedents[numDataNodes + numParityNodes + i] = tmpwriteQNode;
   1172       1.16     oster 			termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
   1173       1.22     oster 			tmpwriteQNode = tmpwriteQNode->list_next;
   1174        1.3     oster 		}
   1175        1.3     oster 	}
   1176       1.20     oster #endif
   1177        1.1     oster }
   1178        1.1     oster 
   1179        1.1     oster 
   1180        1.1     oster /******************************************************************************
   1181        1.1     oster  * create a write graph (fault-free or degraded) for RAID level 1
   1182        1.1     oster  *
   1183        1.1     oster  * Hdr -> Commit -> Wpd -> Nil -> Trm
   1184        1.1     oster  *               -> Wsd ->
   1185        1.1     oster  *
   1186        1.1     oster  * The "Wpd" node writes data to the primary copy in the mirror pair
   1187        1.1     oster  * The "Wsd" node writes data to the secondary copy in the mirror pair
   1188        1.1     oster  *
   1189        1.1     oster  * Parameters:  raidPtr   - description of the physical array
   1190        1.1     oster  *              asmap     - logical & physical addresses for this access
   1191        1.1     oster  *              bp        - buffer ptr (holds write data)
   1192        1.3     oster  *              flags     - general flags (e.g. disk locking)
   1193        1.1     oster  *              allocList - list of memory allocated in DAG creation
   1194        1.1     oster  *****************************************************************************/
   1195        1.1     oster 
   1196       1.29     perry void
   1197       1.13     oster rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
   1198       1.33  christos 			 RF_DagHeader_t *dag_h, void *bp,
   1199       1.33  christos 			 RF_RaidAccessFlags_t flags,
   1200       1.13     oster 			 RF_AllocListElem_t *allocList)
   1201        1.1     oster {
   1202        1.3     oster 	RF_DagNode_t *unblockNode, *termNode, *commitNode;
   1203       1.22     oster 	RF_DagNode_t *wndNode, *wmirNode;
   1204       1.22     oster 	RF_DagNode_t *tmpNode, *tmpwndNode, *tmpwmirNode;
   1205        1.3     oster 	int     nWndNodes, nWmirNodes, i;
   1206        1.3     oster 	RF_ReconUnitNum_t which_ru;
   1207        1.3     oster 	RF_PhysDiskAddr_t *pda, *pdaP;
   1208        1.3     oster 	RF_StripeNum_t parityStripeID;
   1209        1.3     oster 
   1210        1.3     oster 	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
   1211        1.3     oster 	    asmap->raidAddress, &which_ru);
   1212       1.19     oster #if RF_DEBUG_DAG
   1213        1.3     oster 	if (rf_dagDebug) {
   1214        1.3     oster 		printf("[Creating RAID level 1 write DAG]\n");
   1215        1.3     oster 	}
   1216       1.19     oster #endif
   1217        1.3     oster 	dag_h->creator = "RaidOneWriteDAG";
   1218        1.3     oster 
   1219        1.3     oster 	/* 2 implies access not SU aligned */
   1220        1.3     oster 	nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
   1221        1.3     oster 	nWndNodes = (asmap->physInfo->next) ? 2 : 1;
   1222        1.3     oster 
   1223        1.3     oster 	/* alloc the Wnd nodes and the Wmir node */
   1224        1.3     oster 	if (asmap->numDataFailed == 1)
   1225        1.3     oster 		nWndNodes--;
   1226        1.3     oster 	if (asmap->numParityFailed == 1)
   1227        1.3     oster 		nWmirNodes--;
   1228        1.3     oster 
   1229        1.3     oster 	/* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
   1230        1.3     oster 	 * + terminator) */
   1231       1.22     oster 	for (i = 0; i < nWndNodes; i++) {
   1232       1.22     oster 		tmpNode = rf_AllocDAGNode();
   1233       1.22     oster 		tmpNode->list_next = dag_h->nodes;
   1234       1.22     oster 		dag_h->nodes = tmpNode;
   1235       1.22     oster 	}
   1236       1.22     oster 	wndNode = dag_h->nodes;
   1237       1.22     oster 
   1238       1.22     oster 	for (i = 0; i < nWmirNodes; i++) {
   1239       1.22     oster 		tmpNode = rf_AllocDAGNode();
   1240       1.22     oster 		tmpNode->list_next = dag_h->nodes;
   1241       1.22     oster 		dag_h->nodes = tmpNode;
   1242       1.22     oster 	}
   1243       1.22     oster 	wmirNode = dag_h->nodes;
   1244       1.22     oster 
   1245       1.22     oster 	commitNode = rf_AllocDAGNode();
   1246       1.22     oster 	commitNode->list_next = dag_h->nodes;
   1247       1.22     oster 	dag_h->nodes = commitNode;
   1248       1.22     oster 
   1249       1.22     oster 	unblockNode = rf_AllocDAGNode();
   1250       1.22     oster 	unblockNode->list_next = dag_h->nodes;
   1251       1.22     oster 	dag_h->nodes = unblockNode;
   1252       1.22     oster 
   1253       1.22     oster 	termNode = rf_AllocDAGNode();
   1254       1.22     oster 	termNode->list_next = dag_h->nodes;
   1255       1.22     oster 	dag_h->nodes = termNode;
   1256        1.3     oster 
   1257        1.3     oster 	/* this dag can commit immediately */
   1258        1.3     oster 	dag_h->numCommitNodes = 1;
   1259        1.3     oster 	dag_h->numCommits = 0;
   1260        1.3     oster 	dag_h->numSuccedents = 1;
   1261        1.3     oster 
   1262        1.3     oster 	/* initialize the commit, unblock, and term nodes */
   1263       1.29     perry 	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
   1264       1.29     perry 		    rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes),
   1265       1.14     oster 		    0, 0, 0, dag_h, "Cmt", allocList);
   1266       1.29     perry 	rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
   1267       1.29     perry 		    rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes),
   1268       1.14     oster 		    0, 0, dag_h, "Nil", allocList);
   1269       1.29     perry 	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
   1270       1.29     perry 		    rf_TerminateUndoFunc, NULL, 0, 1, 0, 0,
   1271       1.14     oster 		    dag_h, "Trm", allocList);
   1272        1.3     oster 
   1273        1.3     oster 	/* initialize the wnd nodes */
   1274        1.3     oster 	if (nWndNodes > 0) {
   1275        1.3     oster 		pda = asmap->physInfo;
   1276       1.22     oster 		tmpwndNode = wndNode;
   1277        1.3     oster 		for (i = 0; i < nWndNodes; i++) {
   1278       1.29     perry 			rf_InitNode(tmpwndNode, rf_wait, RF_FALSE,
   1279       1.14     oster 				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
   1280       1.29     perry 				    rf_GenericWakeupFunc, 1, 1, 4, 0,
   1281       1.14     oster 				    dag_h, "Wpd", allocList);
   1282        1.3     oster 			RF_ASSERT(pda != NULL);
   1283       1.22     oster 			tmpwndNode->params[0].p = pda;
   1284       1.22     oster 			tmpwndNode->params[1].p = pda->bufPtr;
   1285       1.22     oster 			tmpwndNode->params[2].v = parityStripeID;
   1286       1.22     oster 			tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
   1287        1.3     oster 			pda = pda->next;
   1288       1.22     oster 			tmpwndNode = tmpwndNode->list_next;
   1289        1.3     oster 		}
   1290        1.3     oster 		RF_ASSERT(pda == NULL);
   1291        1.3     oster 	}
   1292        1.3     oster 	/* initialize the mirror nodes */
   1293        1.3     oster 	if (nWmirNodes > 0) {
   1294        1.3     oster 		pda = asmap->physInfo;
   1295        1.3     oster 		pdaP = asmap->parityInfo;
   1296       1.22     oster 		tmpwmirNode = wmirNode;
   1297        1.3     oster 		for (i = 0; i < nWmirNodes; i++) {
   1298       1.29     perry 			rf_InitNode(tmpwmirNode, rf_wait, RF_FALSE,
   1299       1.14     oster 				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
   1300       1.29     perry 				    rf_GenericWakeupFunc, 1, 1, 4, 0,
   1301       1.14     oster 				    dag_h, "Wsd", allocList);
   1302        1.3     oster 			RF_ASSERT(pda != NULL);
   1303       1.22     oster 			tmpwmirNode->params[0].p = pdaP;
   1304       1.22     oster 			tmpwmirNode->params[1].p = pda->bufPtr;
   1305       1.22     oster 			tmpwmirNode->params[2].v = parityStripeID;
   1306       1.22     oster 			tmpwmirNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
   1307        1.3     oster 			pda = pda->next;
   1308        1.3     oster 			pdaP = pdaP->next;
   1309       1.22     oster 			tmpwmirNode = tmpwmirNode->list_next;
   1310        1.3     oster 		}
   1311        1.3     oster 		RF_ASSERT(pda == NULL);
   1312        1.3     oster 		RF_ASSERT(pdaP == NULL);
   1313        1.3     oster 	}
   1314        1.3     oster 	/* link the header node to the commit node */
   1315        1.3     oster 	RF_ASSERT(dag_h->numSuccedents == 1);
   1316        1.3     oster 	RF_ASSERT(commitNode->numAntecedents == 0);
   1317        1.3     oster 	dag_h->succedents[0] = commitNode;
   1318        1.3     oster 
   1319        1.3     oster 	/* link the commit node to the write nodes */
   1320        1.3     oster 	RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
   1321       1.22     oster 	tmpwndNode = wndNode;
   1322        1.3     oster 	for (i = 0; i < nWndNodes; i++) {
   1323       1.22     oster 		RF_ASSERT(tmpwndNode->numAntecedents == 1);
   1324       1.22     oster 		commitNode->succedents[i] = tmpwndNode;
   1325       1.22     oster 		tmpwndNode->antecedents[0] = commitNode;
   1326       1.22     oster 		tmpwndNode->antType[0] = rf_control;
   1327       1.22     oster 		tmpwndNode = tmpwndNode->list_next;
   1328        1.3     oster 	}
   1329       1.22     oster 	tmpwmirNode = wmirNode;
   1330        1.3     oster 	for (i = 0; i < nWmirNodes; i++) {
   1331       1.22     oster 		RF_ASSERT(tmpwmirNode->numAntecedents == 1);
   1332       1.22     oster 		commitNode->succedents[i + nWndNodes] = tmpwmirNode;
   1333       1.22     oster 		tmpwmirNode->antecedents[0] = commitNode;
   1334       1.22     oster 		tmpwmirNode->antType[0] = rf_control;
   1335       1.22     oster 		tmpwmirNode = tmpwmirNode->list_next;
   1336        1.3     oster 	}
   1337        1.3     oster 
   1338        1.3     oster 	/* link the write nodes to the unblock node */
   1339        1.3     oster 	RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
   1340       1.22     oster 	tmpwndNode = wndNode;
   1341        1.3     oster 	for (i = 0; i < nWndNodes; i++) {
   1342       1.22     oster 		RF_ASSERT(tmpwndNode->numSuccedents == 1);
   1343       1.22     oster 		tmpwndNode->succedents[0] = unblockNode;
   1344       1.22     oster 		unblockNode->antecedents[i] = tmpwndNode;
   1345        1.3     oster 		unblockNode->antType[i] = rf_control;
   1346       1.22     oster 		tmpwndNode = tmpwndNode->list_next;
   1347        1.3     oster 	}
   1348       1.22     oster 	tmpwmirNode = wmirNode;
   1349        1.3     oster 	for (i = 0; i < nWmirNodes; i++) {
   1350       1.22     oster 		RF_ASSERT(tmpwmirNode->numSuccedents == 1);
   1351       1.22     oster 		tmpwmirNode->succedents[0] = unblockNode;
   1352       1.22     oster 		unblockNode->antecedents[i + nWndNodes] = tmpwmirNode;
   1353        1.3     oster 		unblockNode->antType[i + nWndNodes] = rf_control;
   1354       1.22     oster 		tmpwmirNode = tmpwmirNode->list_next;
   1355        1.3     oster 	}
   1356        1.3     oster 
   1357        1.3     oster 	/* link the unblock node to the term node */
   1358        1.3     oster 	RF_ASSERT(unblockNode->numSuccedents == 1);
   1359        1.3     oster 	RF_ASSERT(termNode->numAntecedents == 1);
   1360        1.3     oster 	RF_ASSERT(termNode->numSuccedents == 0);
   1361        1.3     oster 	unblockNode->succedents[0] = termNode;
   1362        1.3     oster 	termNode->antecedents[0] = unblockNode;
   1363        1.3     oster 	termNode->antType[0] = rf_control;
   1364        1.1     oster }
   1365