Home | History | Annotate | Line # | Download | only in raidframe
rf_pqdegdags.c revision 1.10
      1  1.10    perry /*	$NetBSD: rf_pqdegdags.c,v 1.10 2005/02/27 00:27:45 perry Exp $	*/
      2   1.1    oster /*
      3   1.1    oster  * Copyright (c) 1995 Carnegie-Mellon University.
      4   1.1    oster  * All rights reserved.
      5   1.1    oster  *
      6   1.1    oster  * Author: Daniel Stodolsky
      7   1.1    oster  *
      8   1.1    oster  * Permission to use, copy, modify and distribute this software and
      9   1.1    oster  * its documentation is hereby granted, provided that both the copyright
     10   1.1    oster  * notice and this permission notice appear in all copies of the
     11   1.1    oster  * software, derivative works or modified versions, and any portions
     12   1.1    oster  * thereof, and that both notices appear in supporting documentation.
     13   1.1    oster  *
     14   1.1    oster  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15   1.1    oster  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16   1.1    oster  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17   1.1    oster  *
     18   1.1    oster  * Carnegie Mellon requests users of this software to return to
     19   1.1    oster  *
     20   1.1    oster  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21   1.1    oster  *  School of Computer Science
     22   1.1    oster  *  Carnegie Mellon University
     23   1.1    oster  *  Pittsburgh PA 15213-3890
     24   1.1    oster  *
     25   1.1    oster  * any improvements or extensions that they make and grant Carnegie the
     26   1.1    oster  * rights to redistribute these changes.
     27   1.1    oster  */
     28   1.1    oster 
     29   1.1    oster /*
     30   1.1    oster  * rf_pqdegdags.c
     31   1.3    oster  * Degraded mode dags for double fault cases.
     32   1.1    oster */
     33   1.1    oster 
     34   1.8    lukem 
     35   1.8    lukem #include <sys/cdefs.h>
     36  1.10    perry __KERNEL_RCSID(0, "$NetBSD: rf_pqdegdags.c,v 1.10 2005/02/27 00:27:45 perry Exp $");
     37   1.1    oster 
     38   1.1    oster #include "rf_archs.h"
     39   1.1    oster 
     40   1.1    oster #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
     41   1.1    oster 
     42   1.7    oster #include <dev/raidframe/raidframevar.h>
     43   1.7    oster 
     44   1.1    oster #include "rf_raid.h"
     45   1.1    oster #include "rf_dag.h"
     46   1.5    oster #include "rf_dagdegrd.h"
     47   1.5    oster #include "rf_dagdegwr.h"
     48   1.1    oster #include "rf_dagfuncs.h"
     49   1.1    oster #include "rf_dagutils.h"
     50   1.1    oster #include "rf_etimer.h"
     51   1.1    oster #include "rf_acctrace.h"
     52   1.1    oster #include "rf_general.h"
     53   1.1    oster #include "rf_pqdegdags.h"
     54   1.1    oster #include "rf_pq.h"
     55   1.1    oster 
     56  1.10    perry static void
     57   1.3    oster applyPDA(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda, RF_PhysDiskAddr_t * ppda,
     58   1.3    oster     RF_PhysDiskAddr_t * qpda, void *bp);
     59   1.1    oster 
     60   1.1    oster /*
     61   1.3    oster    Two data drives have failed, and we are doing a read that covers one of them.
     62   1.3    oster    We may also be reading some of the surviving drives.
     63   1.3    oster 
     64   1.1    oster 
     65   1.1    oster  *****************************************************************************************
     66   1.1    oster  *
     67   1.1    oster  * creates a DAG to perform a degraded-mode read of data within one stripe.
     68   1.1    oster  * This DAG is as follows:
     69   1.1    oster  *
     70   1.1    oster  *                                      Hdr
     71   1.1    oster  *                                       |
     72   1.1    oster  *                                     Block
     73   1.1    oster  *                       /         /           \         \     \   \
     74   1.1    oster  *                      Rud  ...  Rud         Rrd  ...  Rrd    Rp  Rq
     75   1.1    oster  *                      | \       | \         | \       | \    | \ | \
     76   1.1    oster  *
     77   1.1    oster  *                                 |                 |
     78   1.1    oster  *                              Unblock              X
     79   1.1    oster  *                                  \               /
     80   1.1    oster  *                                   ------ T ------
     81   1.1    oster  *
     82   1.1    oster  * Each R node is a successor of the L node
     83   1.1    oster  * One successor arc from each R node goes to U, and the other to X
     84   1.1    oster  * There is one Rud for each chunk of surviving user data requested by the user,
     85   1.1    oster  * and one Rrd for each chunk of surviving user data _not_ being read by the user
     86   1.1    oster  * R = read, ud = user data, rd = recovery (surviving) data, p = P data, q = Qdata
     87   1.1    oster  * X = pq recovery node, T = terminate
     88   1.1    oster  *
     89   1.1    oster  * The block & unblock nodes are leftovers from a previous version.  They
     90   1.1    oster  * do nothing, but I haven't deleted them because it would be a tremendous
     91   1.1    oster  * effort to put them back in.
     92   1.1    oster  *
     93   1.1    oster  * Note:  The target buffer for the XOR node is set to the actual user buffer where the
     94   1.1    oster  * failed data is supposed to end up.  This buffer is zero'd by the code here.  Thus,
     95   1.1    oster  * if you create a degraded read dag, use it, and then re-use, you have to be sure to
     96   1.1    oster  * zero the target buffer prior to the re-use.
     97   1.1    oster  *
     98   1.1    oster  * Every buffer read is passed to the pq recovery node, whose job it is to sort out whats
     99   1.1    oster  * needs and what's not.
    100   1.1    oster  ****************************************************************************************/
    101   1.1    oster /*   init a disk node with 2 successors and one predecessor */
    102   1.1    oster #define INIT_DISK_NODE(node,name) \
    103   1.1    oster rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, dag_h, name, allocList); \
    104   1.1    oster (node)->succedents[0] = unblockNode; \
    105   1.1    oster (node)->succedents[1] = recoveryNode; \
    106   1.1    oster (node)->antecedents[0] = blockNode; \
    107   1.1    oster (node)->antType[0] = rf_control
    108   1.1    oster 
    109   1.1    oster #define DISK_NODE_PARAMS(_node_,_p_) \
    110   1.1    oster   (_node_).params[0].p = _p_ ; \
    111   1.1    oster   (_node_).params[1].p = (_p_)->bufPtr; \
    112   1.1    oster   (_node_).params[2].v = parityStripeID; \
    113   1.9    oster   (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru)
    114   1.1    oster 
    115   1.1    oster #define DISK_NODE_PDA(node)  ((node)->params[0].p)
    116   1.1    oster 
    117   1.1    oster RF_CREATE_DAG_FUNC_DECL(rf_PQ_DoubleDegRead)
    118   1.1    oster {
    119   1.3    oster 	rf_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList,
    120   1.3    oster 	    "Rq", "PQ Recovery", rf_PQDoubleRecoveryFunc);
    121   1.1    oster }
    122   1.3    oster 
    123  1.10    perry static void
    124   1.3    oster applyPDA(raidPtr, pda, ppda, qpda, bp)
    125   1.3    oster 	RF_Raid_t *raidPtr;
    126   1.3    oster 	RF_PhysDiskAddr_t *pda;
    127   1.3    oster 	RF_PhysDiskAddr_t *ppda;
    128   1.3    oster 	RF_PhysDiskAddr_t *qpda;
    129   1.3    oster 	void   *bp;
    130   1.1    oster {
    131   1.3    oster 	RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
    132   1.3    oster 	RF_RaidAddr_t s0off = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
    133   1.3    oster 	RF_SectorCount_t s0len = ppda->numSector, len;
    134   1.3    oster 	RF_SectorNum_t suoffset;
    135   1.3    oster 	unsigned coeff;
    136   1.3    oster 	char   *pbuf = ppda->bufPtr;
    137   1.3    oster 	char   *qbuf = qpda->bufPtr;
    138   1.3    oster 	char   *buf;
    139   1.3    oster 	int     delta;
    140   1.3    oster 
    141   1.3    oster 	suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    142   1.3    oster 	len = pda->numSector;
    143   1.3    oster 	/* see if pda intersects a recovery pda */
    144   1.3    oster 	if ((suoffset < s0off + s0len) && (suoffset + len > s0off)) {
    145   1.3    oster 		buf = pda->bufPtr;
    146   1.3    oster 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress);
    147   1.3    oster 		coeff = (coeff % raidPtr->Layout.numDataCol);
    148   1.3    oster 
    149   1.3    oster 		if (suoffset < s0off) {
    150   1.3    oster 			delta = s0off - suoffset;
    151   1.3    oster 			buf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), delta);
    152   1.3    oster 			suoffset = s0off;
    153   1.3    oster 			len -= delta;
    154   1.3    oster 		}
    155   1.3    oster 		if (suoffset > s0off) {
    156   1.3    oster 			delta = suoffset - s0off;
    157   1.3    oster 			pbuf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), delta);
    158   1.3    oster 			qbuf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), delta);
    159   1.3    oster 		}
    160   1.3    oster 		if ((suoffset + len) > (s0len + s0off))
    161   1.3    oster 			len = s0len + s0off - suoffset;
    162   1.3    oster 
    163   1.3    oster 		/* src, dest, len */
    164   1.3    oster 		rf_bxor(buf, pbuf, rf_RaidAddressToByte(raidPtr, len), bp);
    165   1.3    oster 
    166   1.3    oster 		/* dest, src, len, coeff */
    167   1.3    oster 		rf_IncQ((unsigned long *) qbuf, (unsigned long *) buf, rf_RaidAddressToByte(raidPtr, len), coeff);
    168   1.1    oster 	}
    169   1.1    oster }
    170   1.1    oster /*
    171   1.1    oster    Recover data in the case of a double failure. There can be two
    172   1.1    oster    result buffers, one for each chunk of data trying to be recovered.
    173   1.1    oster    The params are pda's that have not been range restricted or otherwise
    174   1.1    oster    politely massaged - this should be done here. The last params are the
    175   1.1    oster    pdas of P and Q, followed by the raidPtr. The list can look like
    176   1.1    oster 
    177   1.1    oster    pda, pda, ... , p pda, q pda, raidptr, asm
    178   1.3    oster 
    179   1.1    oster    or
    180   1.1    oster 
    181   1.1    oster    pda, pda, ... , p_1 pda, p_2 pda, q_1 pda, q_2 pda, raidptr, asm
    182   1.1    oster 
    183   1.1    oster    depending on wether two chunks of recovery data were required.
    184   1.1    oster 
    185   1.1    oster    The second condition only arises if there are two failed buffers
    186   1.1    oster    whose lengths do not add up a stripe unit.
    187   1.1    oster */
    188   1.1    oster 
    189   1.1    oster 
    190  1.10    perry int
    191   1.3    oster rf_PQDoubleRecoveryFunc(node)
    192   1.3    oster 	RF_DagNode_t *node;
    193   1.1    oster {
    194   1.3    oster 	int     np = node->numParams;
    195   1.3    oster 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    196   1.3    oster 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    197   1.3    oster 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    198   1.3    oster 	int     d, i;
    199   1.3    oster 	unsigned coeff;
    200   1.3    oster 	RF_RaidAddr_t sosAddr, suoffset;
    201   1.3    oster 	RF_SectorCount_t len, secPerSU = layoutPtr->sectorsPerStripeUnit;
    202   1.3    oster 	int     two = 0;
    203   1.3    oster 	RF_PhysDiskAddr_t *ppda, *ppda2, *qpda, *qpda2, *pda, npda;
    204   1.3    oster 	char   *buf;
    205   1.3    oster 	int     numDataCol = layoutPtr->numDataCol;
    206   1.3    oster 	RF_Etimer_t timer;
    207   1.3    oster 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    208   1.3    oster 
    209   1.3    oster 	RF_ETIMER_START(timer);
    210   1.3    oster 
    211   1.3    oster 	if (asmap->failedPDAs[1] &&
    212   1.3    oster 	    (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
    213   1.3    oster 		RF_ASSERT(0);
    214   1.3    oster 		ppda = node->params[np - 6].p;
    215   1.3    oster 		ppda2 = node->params[np - 5].p;
    216   1.3    oster 		qpda = node->params[np - 4].p;
    217   1.3    oster 		qpda2 = node->params[np - 3].p;
    218   1.3    oster 		d = (np - 6);
    219   1.3    oster 		two = 1;
    220   1.3    oster 	} else {
    221   1.3    oster 		ppda = node->params[np - 4].p;
    222   1.3    oster 		qpda = node->params[np - 3].p;
    223   1.3    oster 		d = (np - 4);
    224   1.3    oster 	}
    225   1.3    oster 
    226   1.3    oster 	for (i = 0; i < d; i++) {
    227   1.3    oster 		pda = node->params[i].p;
    228   1.3    oster 		buf = pda->bufPtr;
    229   1.3    oster 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    230   1.3    oster 		len = pda->numSector;
    231   1.3    oster 		coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress);
    232   1.3    oster 		/* compute the data unit offset within the column */
    233   1.3    oster 		coeff = (coeff % raidPtr->Layout.numDataCol);
    234   1.3    oster 		/* see if pda intersects a recovery pda */
    235   1.3    oster 		applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp);
    236   1.3    oster 		if (two)
    237   1.3    oster 			applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp);
    238   1.1    oster 	}
    239   1.3    oster 
    240   1.3    oster 	/* ok, we got the parity back to the point where we can recover. We
    241   1.3    oster 	 * now need to determine the coeff of the columns that need to be
    242   1.3    oster 	 * recovered. We can also only need to recover a single stripe unit. */
    243   1.3    oster 
    244   1.3    oster 	if (asmap->failedPDAs[1] == NULL) {	/* only a single stripe unit
    245   1.3    oster 						 * to recover. */
    246   1.3    oster 		pda = asmap->failedPDAs[0];
    247   1.3    oster 		sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    248   1.3    oster 		/* need to determine the column of the other failed disk */
    249   1.3    oster 		coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress);
    250   1.3    oster 		/* compute the data unit offset within the column */
    251   1.3    oster 		coeff = (coeff % raidPtr->Layout.numDataCol);
    252   1.3    oster 		for (i = 0; i < numDataCol; i++) {
    253   1.3    oster 			npda.raidAddress = sosAddr + (i * secPerSU);
    254   1.3    oster 			(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
    255   1.3    oster 			/* skip over dead disks */
    256   1.3    oster 			if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
    257   1.3    oster 				if (i != coeff)
    258   1.3    oster 					break;
    259   1.3    oster 		}
    260   1.3    oster 		RF_ASSERT(i < numDataCol);
    261   1.3    oster 		RF_ASSERT(two == 0);
    262   1.3    oster 		/* recover the data. Since we need only want to recover one
    263   1.3    oster 		 * column, we overwrite the parity with the other one. */
    264   1.3    oster 		if (coeff < i)	/* recovering 'a' */
    265   1.3    oster 			rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) pda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff, i);
    266   1.3    oster 		else		/* recovering 'b' */
    267   1.3    oster 			rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, (unsigned long *) pda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), i, coeff);
    268   1.3    oster 	} else
    269   1.3    oster 		RF_PANIC();
    270   1.3    oster 
    271   1.3    oster 	RF_ETIMER_STOP(timer);
    272   1.3    oster 	RF_ETIMER_EVAL(timer);
    273   1.3    oster 	if (tracerec)
    274   1.3    oster 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    275   1.3    oster 	rf_GenericWakeupFunc(node, 0);
    276   1.3    oster 	return (0);
    277   1.1    oster }
    278   1.1    oster 
    279  1.10    perry int
    280   1.3    oster rf_PQWriteDoubleRecoveryFunc(node)
    281   1.3    oster 	RF_DagNode_t *node;
    282   1.1    oster {
    283   1.3    oster 	/* The situation:
    284  1.10    perry 	 *
    285   1.3    oster 	 * We are doing a write that hits only one failed data unit. The other
    286   1.3    oster 	 * failed data unit is not being overwritten, so we need to generate
    287   1.3    oster 	 * it.
    288  1.10    perry 	 *
    289   1.3    oster 	 * For the moment, we assume all the nonfailed data being written is in
    290   1.3    oster 	 * the shadow of the failed data unit. (i.e,, either a single data
    291   1.3    oster 	 * unit write or the entire failed stripe unit is being overwritten. )
    292  1.10    perry 	 *
    293   1.3    oster 	 * Recovery strategy: apply the recovery data to the parity and q. Use P
    294   1.3    oster 	 * & Q to recover the second failed data unit in P. Zero fill Q, then
    295   1.3    oster 	 * apply the recovered data to p. Then apply the data being written to
    296   1.3    oster 	 * the failed drive. Then walk through the surviving drives, applying
    297   1.3    oster 	 * new data when it exists, othewise the recovery data. Quite a mess.
    298  1.10    perry 	 *
    299  1.10    perry 	 *
    300   1.3    oster 	 * The params
    301  1.10    perry 	 *
    302   1.3    oster 	 * read pda0, read pda1, ... read pda (numDataCol-3), write pda0, ... ,
    303   1.3    oster 	 * write pda (numStripeUnitAccess - numDataFailed), failed pda,
    304   1.3    oster 	 * raidPtr, asmap */
    305   1.3    oster 
    306   1.3    oster 	int     np = node->numParams;
    307   1.3    oster 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
    308   1.3    oster 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
    309   1.3    oster 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
    310   1.3    oster 	int     i;
    311   1.3    oster 	RF_RaidAddr_t sosAddr;
    312   1.3    oster 	unsigned coeff;
    313   1.3    oster 	RF_StripeCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
    314   1.3    oster 	RF_PhysDiskAddr_t *ppda, *qpda, *pda, npda;
    315   1.3    oster 	int     numDataCol = layoutPtr->numDataCol;
    316   1.3    oster 	RF_Etimer_t timer;
    317   1.3    oster 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    318   1.3    oster 
    319   1.3    oster 	RF_ASSERT(node->numResults == 2);
    320   1.3    oster 	RF_ASSERT(asmap->failedPDAs[1] == NULL);
    321   1.3    oster 	RF_ETIMER_START(timer);
    322   1.3    oster 	ppda = node->results[0];
    323   1.3    oster 	qpda = node->results[1];
    324   1.3    oster 	/* apply the recovery data */
    325   1.3    oster 	for (i = 0; i < numDataCol - 2; i++)
    326   1.3    oster 		applyPDA(raidPtr, node->params[i].p, ppda, qpda, node->dagHdr->bp);
    327   1.3    oster 
    328   1.3    oster 	/* determine the other failed data unit */
    329   1.3    oster 	pda = asmap->failedPDAs[0];
    330   1.3    oster 	sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    331   1.3    oster 	/* need to determine the column of the other failed disk */
    332   1.3    oster 	coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress);
    333   1.3    oster 	/* compute the data unit offset within the column */
    334   1.3    oster 	coeff = (coeff % raidPtr->Layout.numDataCol);
    335   1.3    oster 	for (i = 0; i < numDataCol; i++) {
    336   1.3    oster 		npda.raidAddress = sosAddr + (i * secPerSU);
    337   1.3    oster 		(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
    338   1.3    oster 		/* skip over dead disks */
    339   1.3    oster 		if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
    340   1.3    oster 			if (i != coeff)
    341   1.3    oster 				break;
    342   1.3    oster 	}
    343   1.3    oster 	RF_ASSERT(i < numDataCol);
    344   1.3    oster 	/* recover the data. The column we want to recover we write over the
    345   1.3    oster 	 * parity. The column we don't care about we dump in q. */
    346   1.3    oster 	if (coeff < i)		/* recovering 'a' */
    347   1.3    oster 		rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff, i);
    348   1.3    oster 	else			/* recovering 'b' */
    349   1.3    oster 		rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), i, coeff);
    350   1.3    oster 
    351   1.3    oster 	/* OK. The valid data is in P. Zero fill Q, then inc it into it. */
    352   1.6  thorpej 	memset(qpda->bufPtr, 0, rf_RaidAddressToByte(raidPtr, qpda->numSector));
    353   1.3    oster 	rf_IncQ((unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector), i);
    354   1.3    oster 
    355   1.3    oster 	/* now apply all the write data to the buffer */
    356   1.3    oster 	/* single stripe unit write case: the failed data is only thing we are
    357   1.3    oster 	 * writing. */
    358   1.3    oster 	RF_ASSERT(asmap->numStripeUnitsAccessed == 1);
    359   1.3    oster 	/* dest, src, len, coeff */
    360   1.3    oster 	rf_IncQ((unsigned long *) qpda->bufPtr, (unsigned long *) asmap->failedPDAs[0]->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector), coeff);
    361   1.3    oster 	rf_bxor(asmap->failedPDAs[0]->bufPtr, ppda->bufPtr, rf_RaidAddressToByte(raidPtr, ppda->numSector), node->dagHdr->bp);
    362   1.3    oster 
    363   1.3    oster 	/* now apply all the recovery data */
    364   1.3    oster 	for (i = 0; i < numDataCol - 2; i++)
    365   1.3    oster 		applyPDA(raidPtr, node->params[i].p, ppda, qpda, node->dagHdr->bp);
    366   1.3    oster 
    367   1.3    oster 	RF_ETIMER_STOP(timer);
    368   1.3    oster 	RF_ETIMER_EVAL(timer);
    369   1.3    oster 	if (tracerec)
    370   1.3    oster 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
    371   1.1    oster 
    372   1.3    oster 	rf_GenericWakeupFunc(node, 0);
    373   1.3    oster 	return (0);
    374   1.1    oster }
    375   1.1    oster RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDLargeWrite)
    376   1.1    oster {
    377   1.3    oster 	RF_PANIC();
    378   1.1    oster }
    379   1.1    oster /*
    380   1.1    oster    Two lost data unit write case.
    381   1.1    oster 
    382   1.1    oster    There are really two cases here:
    383   1.1    oster 
    384   1.3    oster    (1) The write completely covers the two lost data units.
    385   1.1    oster        In that case, a reconstruct write that doesn't write the
    386   1.1    oster        failed data units will do the correct thing. So in this case,
    387   1.1    oster        the dag looks like
    388   1.1    oster 
    389   1.1    oster             full stripe read of surviving data units (not being overwriten)
    390   1.1    oster 	    write new data (ignoring failed units)   compute P&Q
    391   1.1    oster 	                                             write P&Q
    392   1.1    oster 
    393   1.1    oster 
    394   1.1    oster    (2) The write does not completely cover both failed data units
    395   1.3    oster        (but touches at least one of them). Then we need to do the
    396   1.1    oster        equivalent of a reconstruct read to recover the missing data
    397   1.3    oster        unit from the other stripe.
    398   1.3    oster 
    399   1.1    oster        For any data we are writing that is not in the "shadow"
    400   1.1    oster        of the failed units, we need to do a four cycle update.
    401   1.1    oster        PANIC on this case. for now
    402   1.1    oster 
    403   1.1    oster */
    404   1.1    oster 
    405   1.1    oster RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateWriteDAG)
    406   1.1    oster {
    407   1.3    oster 	RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
    408   1.3    oster 	RF_SectorCount_t sectorsPerSU = layoutPtr->sectorsPerStripeUnit;
    409   1.3    oster 	int     sum;
    410   1.3    oster 	int     nf = asmap->numDataFailed;
    411   1.3    oster 
    412   1.3    oster 	sum = asmap->failedPDAs[0]->numSector;
    413   1.3    oster 	if (nf == 2)
    414   1.3    oster 		sum += asmap->failedPDAs[1]->numSector;
    415   1.3    oster 
    416   1.3    oster 	if ((nf == 2) && (sum == (2 * sectorsPerSU))) {
    417   1.3    oster 		/* large write case */
    418   1.3    oster 		rf_PQ_DDLargeWrite(raidPtr, asmap, dag_h, bp, flags, allocList);
    419   1.3    oster 		return;
    420   1.3    oster 	}
    421   1.3    oster 	if ((nf == asmap->numStripeUnitsAccessed) || (sum >= sectorsPerSU)) {
    422   1.3    oster 		/* small write case, no user data not in shadow */
    423   1.3    oster 		rf_PQ_DDSimpleSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList);
    424   1.3    oster 		return;
    425   1.3    oster 	}
    426   1.3    oster 	RF_PANIC();
    427   1.1    oster }
    428   1.1    oster RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDSimpleSmallWrite)
    429   1.1    oster {
    430   1.3    oster 	rf_DoubleDegSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList, "Rq", "Wq", "PQ Recovery", rf_PQWriteDoubleRecoveryFunc);
    431   1.1    oster }
    432   1.3    oster #endif				/* (RF_INCLUDE_DECL_PQ > 0) ||
    433   1.3    oster 				 * (RF_INCLUDE_RAID6 > 0) */
    434