Home | History | Annotate | Line # | Download | only in raidframe
rf_dagfuncs.c revision 1.3
      1  1.3  oster /*	$NetBSD: rf_dagfuncs.c,v 1.3 1999/02/05 00:06:08 oster Exp $	*/
      2  1.1  oster /*
      3  1.1  oster  * Copyright (c) 1995 Carnegie-Mellon University.
      4  1.1  oster  * All rights reserved.
      5  1.1  oster  *
      6  1.1  oster  * Author: Mark Holland, William V. Courtright II
      7  1.1  oster  *
      8  1.1  oster  * Permission to use, copy, modify and distribute this software and
      9  1.1  oster  * its documentation is hereby granted, provided that both the copyright
     10  1.1  oster  * notice and this permission notice appear in all copies of the
     11  1.1  oster  * software, derivative works or modified versions, and any portions
     12  1.1  oster  * thereof, and that both notices appear in supporting documentation.
     13  1.1  oster  *
     14  1.1  oster  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  1.1  oster  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  1.1  oster  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  1.1  oster  *
     18  1.1  oster  * Carnegie Mellon requests users of this software to return to
     19  1.1  oster  *
     20  1.1  oster  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  1.1  oster  *  School of Computer Science
     22  1.1  oster  *  Carnegie Mellon University
     23  1.1  oster  *  Pittsburgh PA 15213-3890
     24  1.1  oster  *
     25  1.1  oster  * any improvements or extensions that they make and grant Carnegie the
     26  1.1  oster  * rights to redistribute these changes.
     27  1.1  oster  */
     28  1.1  oster 
     29  1.1  oster /*
     30  1.1  oster  * dagfuncs.c -- DAG node execution routines
     31  1.1  oster  *
     32  1.1  oster  * Rules:
     33  1.1  oster  * 1. Every DAG execution function must eventually cause node->status to
     34  1.1  oster  *    get set to "good" or "bad", and "FinishNode" to be called. In the
     35  1.1  oster  *    case of nodes that complete immediately (xor, NullNodeFunc, etc),
     36  1.1  oster  *    the node execution function can do these two things directly. In
     37  1.1  oster  *    the case of nodes that have to wait for some event (a disk read to
     38  1.1  oster  *    complete, a lock to be released, etc) to occur before they can
     39  1.1  oster  *    complete, this is typically achieved by having whatever module
     40  1.1  oster  *    is doing the operation call GenericWakeupFunc upon completion.
     41  1.1  oster  * 2. DAG execution functions should check the status in the DAG header
     42  1.1  oster  *    and NOP out their operations if the status is not "enable". However,
     43  1.1  oster  *    execution functions that release resources must be sure to release
     44  1.1  oster  *    them even when they NOP out the function that would use them.
     45  1.1  oster  *    Functions that acquire resources should go ahead and acquire them
     46  1.1  oster  *    even when they NOP, so that a downstream release node will not have
     47  1.1  oster  *    to check to find out whether or not the acquire was suppressed.
     48  1.1  oster  */
     49  1.1  oster 
     50  1.1  oster #include <sys/ioctl.h>
     51  1.1  oster #include <sys/param.h>
     52  1.1  oster 
     53  1.1  oster #include "rf_archs.h"
     54  1.1  oster #include "rf_raid.h"
     55  1.1  oster #include "rf_dag.h"
     56  1.1  oster #include "rf_layout.h"
     57  1.1  oster #include "rf_etimer.h"
     58  1.1  oster #include "rf_acctrace.h"
     59  1.1  oster #include "rf_diskqueue.h"
     60  1.1  oster #include "rf_dagfuncs.h"
     61  1.1  oster #include "rf_general.h"
     62  1.1  oster #include "rf_engine.h"
     63  1.1  oster #include "rf_dagutils.h"
     64  1.1  oster 
     65  1.1  oster #include "rf_kintf.h"
     66  1.1  oster 
     67  1.1  oster #if RF_INCLUDE_PARITYLOGGING > 0
     68  1.1  oster #include "rf_paritylog.h"
     69  1.3  oster #endif				/* RF_INCLUDE_PARITYLOGGING > 0 */
     70  1.1  oster 
     71  1.3  oster int     (*rf_DiskReadFunc) (RF_DagNode_t *);
     72  1.3  oster int     (*rf_DiskWriteFunc) (RF_DagNode_t *);
     73  1.3  oster int     (*rf_DiskReadUndoFunc) (RF_DagNode_t *);
     74  1.3  oster int     (*rf_DiskWriteUndoFunc) (RF_DagNode_t *);
     75  1.3  oster int     (*rf_DiskUnlockFunc) (RF_DagNode_t *);
     76  1.3  oster int     (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *);
     77  1.3  oster int     (*rf_RegularXorUndoFunc) (RF_DagNode_t *);
     78  1.3  oster int     (*rf_SimpleXorUndoFunc) (RF_DagNode_t *);
     79  1.3  oster int     (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *);
     80  1.1  oster 
     81  1.1  oster /*****************************************************************************************
     82  1.1  oster  * main (only) configuration routine for this module
     83  1.1  oster  ****************************************************************************************/
     84  1.3  oster int
     85  1.3  oster rf_ConfigureDAGFuncs(listp)
     86  1.3  oster 	RF_ShutdownList_t **listp;
     87  1.3  oster {
     88  1.3  oster 	RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) || ((sizeof(long) == 4) && RF_LONGSHIFT == 2));
     89  1.3  oster 	rf_DiskReadFunc = rf_DiskReadFuncForThreads;
     90  1.3  oster 	rf_DiskReadUndoFunc = rf_DiskUndoFunc;
     91  1.3  oster 	rf_DiskWriteFunc = rf_DiskWriteFuncForThreads;
     92  1.3  oster 	rf_DiskWriteUndoFunc = rf_DiskUndoFunc;
     93  1.3  oster 	rf_DiskUnlockFunc = rf_DiskUnlockFuncForThreads;
     94  1.3  oster 	rf_DiskUnlockUndoFunc = rf_NullNodeUndoFunc;
     95  1.3  oster 	rf_RegularXorUndoFunc = rf_NullNodeUndoFunc;
     96  1.3  oster 	rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc;
     97  1.3  oster 	rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc;
     98  1.3  oster 	return (0);
     99  1.1  oster }
    100  1.1  oster 
    101  1.1  oster 
    102  1.1  oster 
    103  1.1  oster /*****************************************************************************************
    104  1.1  oster  * the execution function associated with a terminate node
    105  1.1  oster  ****************************************************************************************/
    106  1.3  oster int
    107  1.3  oster rf_TerminateFunc(node)
    108  1.3  oster 	RF_DagNode_t *node;
    109  1.1  oster {
    110  1.3  oster 	RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes);
    111  1.3  oster 	node->status = rf_good;
    112  1.3  oster 	return (rf_FinishNode(node, RF_THREAD_CONTEXT));
    113  1.1  oster }
    114  1.1  oster 
    115  1.3  oster int
    116  1.3  oster rf_TerminateUndoFunc(node)
    117  1.3  oster 	RF_DagNode_t *node;
    118  1.1  oster {
    119  1.3  oster 	return (0);
    120  1.1  oster }
    121  1.1  oster 
    122  1.1  oster 
    123  1.1  oster /*****************************************************************************************
    124  1.1  oster  * execution functions associated with a mirror node
    125  1.1  oster  *
    126  1.1  oster  * parameters:
    127  1.1  oster  *
    128  1.1  oster  * 0 - physical disk addres of data
    129  1.1  oster  * 1 - buffer for holding read data
    130  1.1  oster  * 2 - parity stripe ID
    131  1.1  oster  * 3 - flags
    132  1.1  oster  * 4 - physical disk address of mirror (parity)
    133  1.1  oster  *
    134  1.1  oster  ****************************************************************************************/
    135  1.1  oster 
    136  1.3  oster int
    137  1.3  oster rf_DiskReadMirrorIdleFunc(node)
    138  1.3  oster 	RF_DagNode_t *node;
    139  1.1  oster {
    140  1.3  oster 	/* select the mirror copy with the shortest queue and fill in node
    141  1.3  oster 	 * parameters with physical disk address */
    142  1.1  oster 
    143  1.3  oster 	rf_SelectMirrorDiskIdle(node);
    144  1.3  oster 	return (rf_DiskReadFunc(node));
    145  1.1  oster }
    146  1.1  oster 
    147  1.3  oster int
    148  1.3  oster rf_DiskReadMirrorPartitionFunc(node)
    149  1.3  oster 	RF_DagNode_t *node;
    150  1.1  oster {
    151  1.3  oster 	/* select the mirror copy with the shortest queue and fill in node
    152  1.3  oster 	 * parameters with physical disk address */
    153  1.1  oster 
    154  1.3  oster 	rf_SelectMirrorDiskPartition(node);
    155  1.3  oster 	return (rf_DiskReadFunc(node));
    156  1.1  oster }
    157  1.1  oster 
    158  1.3  oster int
    159  1.3  oster rf_DiskReadMirrorUndoFunc(node)
    160  1.3  oster 	RF_DagNode_t *node;
    161  1.1  oster {
    162  1.3  oster 	return (0);
    163  1.1  oster }
    164  1.1  oster 
    165  1.1  oster 
    166  1.1  oster 
    167  1.1  oster #if RF_INCLUDE_PARITYLOGGING > 0
    168  1.1  oster /*****************************************************************************************
    169  1.1  oster  * the execution function associated with a parity log update node
    170  1.1  oster  ****************************************************************************************/
    171  1.3  oster int
    172  1.3  oster rf_ParityLogUpdateFunc(node)
    173  1.3  oster 	RF_DagNode_t *node;
    174  1.3  oster {
    175  1.3  oster 	RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
    176  1.3  oster 	caddr_t buf = (caddr_t) node->params[1].p;
    177  1.3  oster 	RF_ParityLogData_t *logData;
    178  1.3  oster 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    179  1.3  oster 	RF_Etimer_t timer;
    180  1.3  oster 
    181  1.3  oster 	if (node->dagHdr->status == rf_enable) {
    182  1.3  oster 		RF_ETIMER_START(timer);
    183  1.3  oster 		logData = rf_CreateParityLogData(RF_UPDATE, pda, buf,
    184  1.3  oster 		    (RF_Raid_t *) (node->dagHdr->raidPtr),
    185  1.3  oster 		    node->wakeFunc, (void *) node,
    186  1.3  oster 		    node->dagHdr->tracerec, timer);
    187  1.3  oster 		if (logData)
    188  1.3  oster 			rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE);
    189  1.3  oster 		else {
    190  1.3  oster 			RF_ETIMER_STOP(timer);
    191  1.3  oster 			RF_ETIMER_EVAL(timer);
    192  1.3  oster 			tracerec->plog_us += RF_ETIMER_VAL_US(timer);
    193  1.3  oster 			(node->wakeFunc) (node, ENOMEM);
    194  1.3  oster 		}
    195  1.1  oster 	}
    196  1.3  oster 	return (0);
    197  1.1  oster }
    198  1.1  oster 
    199  1.1  oster 
    200  1.1  oster /*****************************************************************************************
    201  1.1  oster  * the execution function associated with a parity log overwrite node
    202  1.1  oster  ****************************************************************************************/
    203  1.3  oster int
    204  1.3  oster rf_ParityLogOverwriteFunc(node)
    205  1.3  oster 	RF_DagNode_t *node;
    206  1.3  oster {
    207  1.3  oster 	RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
    208  1.3  oster 	caddr_t buf = (caddr_t) node->params[1].p;
    209  1.3  oster 	RF_ParityLogData_t *logData;
    210  1.3  oster 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    211  1.3  oster 	RF_Etimer_t timer;
    212  1.3  oster 
    213  1.3  oster 	if (node->dagHdr->status == rf_enable) {
    214  1.3  oster 		RF_ETIMER_START(timer);
    215  1.3  oster 		logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf, (RF_Raid_t *) (node->dagHdr->raidPtr),
    216  1.3  oster 		    node->wakeFunc, (void *) node, node->dagHdr->tracerec, timer);
    217  1.3  oster 		if (logData)
    218  1.3  oster 			rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE);
    219  1.3  oster 		else {
    220  1.3  oster 			RF_ETIMER_STOP(timer);
    221  1.3  oster 			RF_ETIMER_EVAL(timer);
    222  1.3  oster 			tracerec->plog_us += RF_ETIMER_VAL_US(timer);
    223  1.3  oster 			(node->wakeFunc) (node, ENOMEM);
    224  1.3  oster 		}
    225  1.1  oster 	}
    226  1.3  oster 	return (0);
    227  1.1  oster }
    228  1.3  oster #else				/* RF_INCLUDE_PARITYLOGGING > 0 */
    229  1.1  oster 
    230  1.3  oster int
    231  1.3  oster rf_ParityLogUpdateFunc(node)
    232  1.3  oster 	RF_DagNode_t *node;
    233  1.1  oster {
    234  1.3  oster 	return (0);
    235  1.1  oster }
    236  1.3  oster int
    237  1.3  oster rf_ParityLogOverwriteFunc(node)
    238  1.3  oster 	RF_DagNode_t *node;
    239  1.1  oster {
    240  1.3  oster 	return (0);
    241  1.1  oster }
    242  1.3  oster #endif				/* RF_INCLUDE_PARITYLOGGING > 0 */
    243  1.1  oster 
    244  1.3  oster int
    245  1.3  oster rf_ParityLogUpdateUndoFunc(node)
    246  1.3  oster 	RF_DagNode_t *node;
    247  1.1  oster {
    248  1.3  oster 	return (0);
    249  1.1  oster }
    250  1.1  oster 
    251  1.3  oster int
    252  1.3  oster rf_ParityLogOverwriteUndoFunc(node)
    253  1.3  oster 	RF_DagNode_t *node;
    254  1.1  oster {
    255  1.3  oster 	return (0);
    256  1.1  oster }
    257  1.1  oster /*****************************************************************************************
    258  1.1  oster  * the execution function associated with a NOP node
    259  1.1  oster  ****************************************************************************************/
    260  1.3  oster int
    261  1.3  oster rf_NullNodeFunc(node)
    262  1.3  oster 	RF_DagNode_t *node;
    263  1.1  oster {
    264  1.3  oster 	node->status = rf_good;
    265  1.3  oster 	return (rf_FinishNode(node, RF_THREAD_CONTEXT));
    266  1.1  oster }
    267  1.1  oster 
    268  1.3  oster int
    269  1.3  oster rf_NullNodeUndoFunc(node)
    270  1.3  oster 	RF_DagNode_t *node;
    271  1.1  oster {
    272  1.3  oster 	node->status = rf_undone;
    273  1.3  oster 	return (rf_FinishNode(node, RF_THREAD_CONTEXT));
    274  1.1  oster }
    275  1.1  oster 
    276  1.1  oster 
    277  1.1  oster /*****************************************************************************************
    278  1.1  oster  * the execution function associated with a disk-read node
    279  1.1  oster  ****************************************************************************************/
    280  1.3  oster int
    281  1.3  oster rf_DiskReadFuncForThreads(node)
    282  1.3  oster 	RF_DagNode_t *node;
    283  1.3  oster {
    284  1.3  oster 	RF_DiskQueueData_t *req;
    285  1.3  oster 	RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
    286  1.3  oster 	caddr_t buf = (caddr_t) node->params[1].p;
    287  1.3  oster 	RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v;
    288  1.3  oster 	unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v);
    289  1.3  oster 	unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
    290  1.3  oster 	unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
    291  1.3  oster 	unsigned which_ru = RF_EXTRACT_RU(node->params[3].v);
    292  1.3  oster 	RF_DiskQueueDataFlags_t flags = 0;
    293  1.3  oster 	RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_READ : RF_IO_TYPE_NOP;
    294  1.3  oster 	RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
    295  1.3  oster 	void   *b_proc = NULL;
    296  1.1  oster #if RF_BACKWARD > 0
    297  1.3  oster 	caddr_t undoBuf;
    298  1.1  oster #endif
    299  1.1  oster 
    300  1.3  oster 	if (node->dagHdr->bp)
    301  1.3  oster 		b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc;
    302  1.1  oster 
    303  1.3  oster 	RF_ASSERT(!(lock && unlock));
    304  1.3  oster 	flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0;
    305  1.3  oster 	flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0;
    306  1.1  oster #if RF_BACKWARD > 0
    307  1.3  oster 	/* allocate and zero the undo buffer. this is equivalent to copying
    308  1.3  oster 	 * the original buffer's contents to the undo buffer prior to
    309  1.3  oster 	 * performing the disk read. XXX hardcoded 512 bytes per sector! */
    310  1.3  oster 	if (node->dagHdr->allocList == NULL)
    311  1.3  oster 		rf_MakeAllocList(node->dagHdr->allocList);
    312  1.3  oster 	RF_CallocAndAdd(undoBuf, 1, 512 * pda->numSector, (caddr_t), node->dagHdr->allocList);
    313  1.3  oster #endif				/* RF_BACKWARD > 0 */
    314  1.3  oster 	req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector,
    315  1.3  oster 	    buf, parityStripeID, which_ru,
    316  1.3  oster 	    (int (*) (void *, int)) node->wakeFunc,
    317  1.3  oster 	    node, NULL, node->dagHdr->tracerec,
    318  1.3  oster 	    (void *) (node->dagHdr->raidPtr), flags, b_proc);
    319  1.3  oster 	if (!req) {
    320  1.3  oster 		(node->wakeFunc) (node, ENOMEM);
    321  1.3  oster 	} else {
    322  1.3  oster 		node->dagFuncData = (void *) req;
    323  1.3  oster 		rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority);
    324  1.3  oster 	}
    325  1.3  oster 	return (0);
    326  1.1  oster }
    327  1.1  oster 
    328  1.1  oster 
    329  1.1  oster /*****************************************************************************************
    330  1.1  oster  * the execution function associated with a disk-write node
    331  1.1  oster  ****************************************************************************************/
    332  1.3  oster int
    333  1.3  oster rf_DiskWriteFuncForThreads(node)
    334  1.3  oster 	RF_DagNode_t *node;
    335  1.3  oster {
    336  1.3  oster 	RF_DiskQueueData_t *req;
    337  1.3  oster 	RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
    338  1.3  oster 	caddr_t buf = (caddr_t) node->params[1].p;
    339  1.3  oster 	RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v;
    340  1.3  oster 	unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v);
    341  1.3  oster 	unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
    342  1.3  oster 	unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
    343  1.3  oster 	unsigned which_ru = RF_EXTRACT_RU(node->params[3].v);
    344  1.3  oster 	RF_DiskQueueDataFlags_t flags = 0;
    345  1.3  oster 	RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP;
    346  1.3  oster 	RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
    347  1.3  oster 	void   *b_proc = NULL;
    348  1.1  oster #if RF_BACKWARD > 0
    349  1.3  oster 	caddr_t undoBuf;
    350  1.1  oster #endif
    351  1.1  oster 
    352  1.3  oster 	if (node->dagHdr->bp)
    353  1.3  oster 		b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc;
    354  1.1  oster 
    355  1.1  oster #if RF_BACKWARD > 0
    356  1.3  oster 	/* This area is used only for backward error recovery experiments
    357  1.3  oster 	 * First, schedule allocate a buffer and schedule a pre-read of the
    358  1.3  oster 	 * disk After the pre-read, proceed with the normal disk write */
    359  1.3  oster 	if (node->status == rf_bwd2) {
    360  1.3  oster 		/* just finished undo logging, now perform real function */
    361  1.3  oster 		node->status = rf_fired;
    362  1.3  oster 		RF_ASSERT(!(lock && unlock));
    363  1.3  oster 		flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0;
    364  1.3  oster 		flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0;
    365  1.3  oster 		req = rf_CreateDiskQueueData(iotype,
    366  1.3  oster 		    pda->startSector, pda->numSector, buf, parityStripeID, which_ru,
    367  1.3  oster 		    node->wakeFunc, (void *) node, NULL, node->dagHdr->tracerec,
    368  1.3  oster 		    (void *) (node->dagHdr->raidPtr), flags, b_proc);
    369  1.3  oster 
    370  1.3  oster 		if (!req) {
    371  1.3  oster 			(node->wakeFunc) (node, ENOMEM);
    372  1.3  oster 		} else {
    373  1.3  oster 			node->dagFuncData = (void *) req;
    374  1.3  oster 			rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority);
    375  1.3  oster 		}
    376  1.3  oster 	} else {
    377  1.3  oster 		/* node status should be rf_fired */
    378  1.3  oster 		/* schedule a disk pre-read */
    379  1.3  oster 		node->status = rf_bwd1;
    380  1.3  oster 		RF_ASSERT(!(lock && unlock));
    381  1.3  oster 		flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0;
    382  1.3  oster 		flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0;
    383  1.3  oster 		if (node->dagHdr->allocList == NULL)
    384  1.3  oster 			rf_MakeAllocList(node->dagHdr->allocList);
    385  1.3  oster 		RF_CallocAndAdd(undoBuf, 1, 512 * pda->numSector, (caddr_t), node->dagHdr->allocList);
    386  1.3  oster 		req = rf_CreateDiskQueueData(RF_IO_TYPE_READ,
    387  1.3  oster 		    pda->startSector, pda->numSector, undoBuf, parityStripeID, which_ru,
    388  1.3  oster 		    node->wakeFunc, (void *) node, NULL, node->dagHdr->tracerec,
    389  1.3  oster 		    (void *) (node->dagHdr->raidPtr), flags, b_proc);
    390  1.3  oster 
    391  1.3  oster 		if (!req) {
    392  1.3  oster 			(node->wakeFunc) (node, ENOMEM);
    393  1.3  oster 		} else {
    394  1.3  oster 			node->dagFuncData = (void *) req;
    395  1.3  oster 			rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority);
    396  1.3  oster 		}
    397  1.3  oster 	}
    398  1.3  oster 	return (0);
    399  1.3  oster #endif				/* RF_BACKWARD > 0 */
    400  1.1  oster 
    401  1.3  oster 	/* normal processing (rollaway or forward recovery) begins here */
    402  1.3  oster 	RF_ASSERT(!(lock && unlock));
    403  1.3  oster 	flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0;
    404  1.3  oster 	flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0;
    405  1.3  oster 	req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector,
    406  1.3  oster 	    buf, parityStripeID, which_ru,
    407  1.3  oster 	    (int (*) (void *, int)) node->wakeFunc,
    408  1.3  oster 	    (void *) node, NULL,
    409  1.3  oster 	    node->dagHdr->tracerec,
    410  1.3  oster 	    (void *) (node->dagHdr->raidPtr),
    411  1.3  oster 	    flags, b_proc);
    412  1.3  oster 
    413  1.3  oster 	if (!req) {
    414  1.3  oster 		(node->wakeFunc) (node, ENOMEM);
    415  1.3  oster 	} else {
    416  1.3  oster 		node->dagFuncData = (void *) req;
    417  1.3  oster 		rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority);
    418  1.3  oster 	}
    419  1.3  oster 
    420  1.3  oster 	return (0);
    421  1.1  oster }
    422  1.1  oster /*****************************************************************************************
    423  1.1  oster  * the undo function for disk nodes
    424  1.1  oster  * Note:  this is not a proper undo of a write node, only locks are released.
    425  1.1  oster  *        old data is not restored to disk!
    426  1.1  oster  ****************************************************************************************/
    427  1.3  oster int
    428  1.3  oster rf_DiskUndoFunc(node)
    429  1.3  oster 	RF_DagNode_t *node;
    430  1.3  oster {
    431  1.3  oster 	RF_DiskQueueData_t *req;
    432  1.3  oster 	RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
    433  1.3  oster 	RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
    434  1.3  oster 
    435  1.3  oster 	req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP,
    436  1.3  oster 	    0L, 0, NULL, 0L, 0,
    437  1.3  oster 	    (int (*) (void *, int)) node->wakeFunc,
    438  1.3  oster 	    (void *) node,
    439  1.3  oster 	    NULL, node->dagHdr->tracerec,
    440  1.3  oster 	    (void *) (node->dagHdr->raidPtr),
    441  1.3  oster 	    RF_UNLOCK_DISK_QUEUE, NULL);
    442  1.3  oster 	if (!req)
    443  1.3  oster 		(node->wakeFunc) (node, ENOMEM);
    444  1.3  oster 	else {
    445  1.3  oster 		node->dagFuncData = (void *) req;
    446  1.3  oster 		rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY);
    447  1.3  oster 	}
    448  1.1  oster 
    449  1.3  oster 	return (0);
    450  1.1  oster }
    451  1.1  oster /*****************************************************************************************
    452  1.1  oster  * the execution function associated with an "unlock disk queue" node
    453  1.1  oster  ****************************************************************************************/
    454  1.3  oster int
    455  1.3  oster rf_DiskUnlockFuncForThreads(node)
    456  1.3  oster 	RF_DagNode_t *node;
    457  1.3  oster {
    458  1.3  oster 	RF_DiskQueueData_t *req;
    459  1.3  oster 	RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
    460  1.3  oster 	RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
    461  1.3  oster 
    462  1.3  oster 	req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP,
    463  1.3  oster 	    0L, 0, NULL, 0L, 0,
    464  1.3  oster 	    (int (*) (void *, int)) node->wakeFunc,
    465  1.3  oster 	    (void *) node,
    466  1.3  oster 	    NULL, node->dagHdr->tracerec,
    467  1.3  oster 	    (void *) (node->dagHdr->raidPtr),
    468  1.3  oster 	    RF_UNLOCK_DISK_QUEUE, NULL);
    469  1.3  oster 	if (!req)
    470  1.3  oster 		(node->wakeFunc) (node, ENOMEM);
    471  1.3  oster 	else {
    472  1.3  oster 		node->dagFuncData = (void *) req;
    473  1.3  oster 		rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY);
    474  1.3  oster 	}
    475  1.1  oster 
    476  1.3  oster 	return (0);
    477  1.1  oster }
    478  1.1  oster /*****************************************************************************************
    479  1.1  oster  * Callback routine for DiskRead and DiskWrite nodes.  When the disk op completes,
    480  1.1  oster  * the routine is called to set the node status and inform the execution engine that
    481  1.1  oster  * the node has fired.
    482  1.1  oster  ****************************************************************************************/
    483  1.3  oster int
    484  1.3  oster rf_GenericWakeupFunc(node, status)
    485  1.3  oster 	RF_DagNode_t *node;
    486  1.3  oster 	int     status;
    487  1.3  oster {
    488  1.3  oster 	switch (node->status) {
    489  1.3  oster 	case rf_bwd1:
    490  1.3  oster 		node->status = rf_bwd2;
    491  1.3  oster 		if (node->dagFuncData)
    492  1.3  oster 			rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData);
    493  1.3  oster 		return (rf_DiskWriteFuncForThreads(node));
    494  1.3  oster 		break;
    495  1.3  oster 	case rf_fired:
    496  1.3  oster 		if (status)
    497  1.3  oster 			node->status = rf_bad;
    498  1.3  oster 		else
    499  1.3  oster 			node->status = rf_good;
    500  1.3  oster 		break;
    501  1.3  oster 	case rf_recover:
    502  1.3  oster 		/* probably should never reach this case */
    503  1.3  oster 		if (status)
    504  1.3  oster 			node->status = rf_panic;
    505  1.3  oster 		else
    506  1.3  oster 			node->status = rf_undone;
    507  1.3  oster 		break;
    508  1.3  oster 	default:
    509  1.3  oster 		RF_PANIC();
    510  1.3  oster 		break;
    511  1.3  oster 	}
    512  1.3  oster 	if (node->dagFuncData)
    513  1.3  oster 		rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData);
    514  1.3  oster 	return (rf_FinishNode(node, RF_INTR_CONTEXT));
    515  1.1  oster }
    516  1.1  oster 
    517  1.1  oster 
    518  1.1  oster /*****************************************************************************************
    519  1.1  oster  * there are three distinct types of xor nodes
    520  1.1  oster  * A "regular xor" is used in the fault-free case where the access spans a complete
    521  1.1  oster  * stripe unit.  It assumes that the result buffer is one full stripe unit in size,
    522  1.1  oster  * and uses the stripe-unit-offset values that it computes from the PDAs to determine
    523  1.1  oster  * where within the stripe unit to XOR each argument buffer.
    524  1.1  oster  *
    525  1.1  oster  * A "simple xor" is used in the fault-free case where the access touches only a portion
    526  1.1  oster  * of one (or two, in some cases) stripe unit(s).  It assumes that all the argument
    527  1.1  oster  * buffers are of the same size and have the same stripe unit offset.
    528  1.1  oster  *
    529  1.1  oster  * A "recovery xor" is used in the degraded-mode case.  It's similar to the regular
    530  1.1  oster  * xor function except that it takes the failed PDA as an additional parameter, and
    531  1.1  oster  * uses it to determine what portions of the argument buffers need to be xor'd into
    532  1.1  oster  * the result buffer, and where in the result buffer they should go.
    533  1.1  oster  ****************************************************************************************/
    534  1.1  oster 
    535  1.1  oster /* xor the params together and store the result in the result field.
    536  1.1  oster  * assume the result field points to a buffer that is the size of one SU,
    537  1.1  oster  * and use the pda params to determine where within the buffer to XOR
    538  1.1  oster  * the input buffers.
    539  1.1  oster  */
    540  1.3  oster int
    541  1.3  oster rf_RegularXorFunc(node)
    542  1.3  oster 	RF_DagNode_t *node;
    543  1.3  oster {
    544  1.3  oster 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    545  1.3  oster 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    546  1.3  oster 	RF_Etimer_t timer;
    547  1.3  oster 	int     i, retcode;
    548  1.1  oster #if RF_BACKWARD > 0
    549  1.3  oster 	RF_PhysDiskAddr_t *pda;
    550  1.3  oster 	caddr_t undoBuf;
    551  1.1  oster #endif
    552  1.1  oster 
    553  1.3  oster 	retcode = 0;
    554  1.3  oster 	if (node->dagHdr->status == rf_enable) {
    555  1.3  oster 		/* don't do the XOR if the input is the same as the output */
    556  1.3  oster 		RF_ETIMER_START(timer);
    557  1.3  oster 		for (i = 0; i < node->numParams - 1; i += 2)
    558  1.3  oster 			if (node->params[i + 1].p != node->results[0]) {
    559  1.1  oster #if RF_BACKWARD > 0
    560  1.3  oster 				/* This section mimics undo logging for
    561  1.3  oster 				 * backward error recovery experiments b
    562  1.3  oster 				 * allocating and initializing a buffer XXX
    563  1.3  oster 				 * 512 byte sector size is hard coded! */
    564  1.3  oster 				pda = node->params[i].p;
    565  1.3  oster 				if (node->dagHdr->allocList == NULL)
    566  1.3  oster 					rf_MakeAllocList(node->dagHdr->allocList);
    567  1.3  oster 				RF_CallocAndAdd(undoBuf, 1, 512 * pda->numSector, (caddr_t), node->dagHdr->allocList);
    568  1.3  oster #endif				/* RF_BACKWARD > 0 */
    569  1.3  oster 				retcode = rf_XorIntoBuffer(raidPtr, (RF_PhysDiskAddr_t *) node->params[i].p,
    570  1.3  oster 				    (char *) node->params[i + 1].p, (char *) node->results[0], node->dagHdr->bp);
    571  1.3  oster 			}
    572  1.3  oster 		RF_ETIMER_STOP(timer);
    573  1.3  oster 		RF_ETIMER_EVAL(timer);
    574  1.3  oster 		tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    575  1.3  oster 	}
    576  1.3  oster 	return (rf_GenericWakeupFunc(node, retcode));	/* call wake func
    577  1.3  oster 							 * explicitly since no
    578  1.3  oster 							 * I/O in this node */
    579  1.1  oster }
    580  1.1  oster /* xor the inputs into the result buffer, ignoring placement issues */
    581  1.3  oster int
    582  1.3  oster rf_SimpleXorFunc(node)
    583  1.3  oster 	RF_DagNode_t *node;
    584  1.3  oster {
    585  1.3  oster 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    586  1.3  oster 	int     i, retcode = 0;
    587  1.3  oster 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    588  1.3  oster 	RF_Etimer_t timer;
    589  1.1  oster #if RF_BACKWARD > 0
    590  1.3  oster 	RF_PhysDiskAddr_t *pda;
    591  1.3  oster 	caddr_t undoBuf;
    592  1.1  oster #endif
    593  1.1  oster 
    594  1.3  oster 	if (node->dagHdr->status == rf_enable) {
    595  1.3  oster 		RF_ETIMER_START(timer);
    596  1.3  oster 		/* don't do the XOR if the input is the same as the output */
    597  1.3  oster 		for (i = 0; i < node->numParams - 1; i += 2)
    598  1.3  oster 			if (node->params[i + 1].p != node->results[0]) {
    599  1.1  oster #if RF_BACKWARD > 0
    600  1.3  oster 				/* This section mimics undo logging for
    601  1.3  oster 				 * backward error recovery experiments b
    602  1.3  oster 				 * allocating and initializing a buffer XXX
    603  1.3  oster 				 * 512 byte sector size is hard coded! */
    604  1.3  oster 				pda = node->params[i].p;
    605  1.3  oster 				if (node->dagHdr->allocList == NULL)
    606  1.3  oster 					rf_MakeAllocList(node->dagHdr->allocList);
    607  1.3  oster 				RF_CallocAndAdd(undoBuf, 1, 512 * pda->numSector, (caddr_t), node->dagHdr->allocList);
    608  1.3  oster #endif				/* RF_BACKWARD > 0 */
    609  1.3  oster 				retcode = rf_bxor((char *) node->params[i + 1].p, (char *) node->results[0],
    610  1.3  oster 				    rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[i].p)->numSector),
    611  1.3  oster 				    (struct buf *) node->dagHdr->bp);
    612  1.3  oster 			}
    613  1.3  oster 		RF_ETIMER_STOP(timer);
    614  1.3  oster 		RF_ETIMER_EVAL(timer);
    615  1.3  oster 		tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    616  1.3  oster 	}
    617  1.3  oster 	return (rf_GenericWakeupFunc(node, retcode));	/* call wake func
    618  1.3  oster 							 * explicitly since no
    619  1.3  oster 							 * I/O in this node */
    620  1.1  oster }
    621  1.1  oster /* this xor is used by the degraded-mode dag functions to recover lost data.
    622  1.1  oster  * the second-to-last parameter is the PDA for the failed portion of the access.
    623  1.1  oster  * the code here looks at this PDA and assumes that the xor target buffer is
    624  1.1  oster  * equal in size to the number of sectors in the failed PDA.  It then uses
    625  1.1  oster  * the other PDAs in the parameter list to determine where within the target
    626  1.1  oster  * buffer the corresponding data should be xored.
    627  1.1  oster  */
    628  1.3  oster int
    629  1.3  oster rf_RecoveryXorFunc(node)
    630  1.3  oster 	RF_DagNode_t *node;
    631  1.3  oster {
    632  1.3  oster 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
    633  1.3  oster 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
    634  1.3  oster 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
    635  1.3  oster 	int     i, retcode = 0;
    636  1.3  oster 	RF_PhysDiskAddr_t *pda;
    637  1.3  oster 	int     suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
    638  1.3  oster 	char   *srcbuf, *destbuf;
    639  1.3  oster 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
    640  1.3  oster 	RF_Etimer_t timer;
    641  1.1  oster #if RF_BACKWARD > 0
    642  1.3  oster 	caddr_t undoBuf;
    643  1.1  oster #endif
    644  1.1  oster 
    645  1.3  oster 	if (node->dagHdr->status == rf_enable) {
    646  1.3  oster 		RF_ETIMER_START(timer);
    647  1.3  oster 		for (i = 0; i < node->numParams - 2; i += 2)
    648  1.3  oster 			if (node->params[i + 1].p != node->results[0]) {
    649  1.3  oster 				pda = (RF_PhysDiskAddr_t *) node->params[i].p;
    650  1.1  oster #if RF_BACKWARD > 0
    651  1.3  oster 				/* This section mimics undo logging for
    652  1.3  oster 				 * backward error recovery experiments b
    653  1.3  oster 				 * allocating and initializing a buffer XXX
    654  1.3  oster 				 * 512 byte sector size is hard coded! */
    655  1.3  oster 				if (node->dagHdr->allocList == NULL)
    656  1.3  oster 					rf_MakeAllocList(node->dagHdr->allocList);
    657  1.3  oster 				RF_CallocAndAdd(undoBuf, 1, 512 * pda->numSector, (caddr_t), node->dagHdr->allocList);
    658  1.3  oster #endif				/* RF_BACKWARD > 0 */
    659  1.3  oster 				srcbuf = (char *) node->params[i + 1].p;
    660  1.3  oster 				suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
    661  1.3  oster 				destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
    662  1.3  oster 				retcode = rf_bxor(srcbuf, destbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), node->dagHdr->bp);
    663  1.3  oster 			}
    664  1.3  oster 		RF_ETIMER_STOP(timer);
    665  1.3  oster 		RF_ETIMER_EVAL(timer);
    666  1.3  oster 		tracerec->xor_us += RF_ETIMER_VAL_US(timer);
    667  1.3  oster 	}
    668  1.3  oster 	return (rf_GenericWakeupFunc(node, retcode));
    669  1.1  oster }
    670  1.1  oster /*****************************************************************************************
    671  1.1  oster  * The next three functions are utilities used by the above xor-execution functions.
    672  1.1  oster  ****************************************************************************************/
    673  1.1  oster 
    674  1.1  oster 
    675  1.1  oster /*
    676  1.1  oster  * this is just a glorified buffer xor.  targbuf points to a buffer that is one full stripe unit
    677  1.1  oster  * in size.  srcbuf points to a buffer that may be less than 1 SU, but never more.  When the
    678  1.1  oster  * access described by pda is one SU in size (which by implication means it's SU-aligned),
    679  1.1  oster  * all that happens is (targbuf) <- (srcbuf ^ targbuf).  When the access is less than one
    680  1.1  oster  * SU in size the XOR occurs on only the portion of targbuf identified in the pda.
    681  1.1  oster  */
    682  1.1  oster 
    683  1.3  oster int
    684  1.3  oster rf_XorIntoBuffer(raidPtr, pda, srcbuf, targbuf, bp)
    685  1.3  oster 	RF_Raid_t *raidPtr;
    686  1.3  oster 	RF_PhysDiskAddr_t *pda;
    687  1.3  oster 	char   *srcbuf;
    688  1.3  oster 	char   *targbuf;
    689  1.3  oster 	void   *bp;
    690  1.3  oster {
    691  1.3  oster 	char   *targptr;
    692  1.3  oster 	int     sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
    693  1.3  oster 	int     SUOffset = pda->startSector % sectPerSU;
    694  1.3  oster 	int     length, retcode = 0;
    695  1.3  oster 
    696  1.3  oster 	RF_ASSERT(pda->numSector <= sectPerSU);
    697  1.3  oster 
    698  1.3  oster 	targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset);
    699  1.3  oster 	length = rf_RaidAddressToByte(raidPtr, pda->numSector);
    700  1.3  oster 	retcode = rf_bxor(srcbuf, targptr, length, bp);
    701  1.3  oster 	return (retcode);
    702  1.1  oster }
    703  1.1  oster /* it really should be the case that the buffer pointers (returned by malloc)
    704  1.1  oster  * are aligned to the natural word size of the machine, so this is the only
    705  1.1  oster  * case we optimize for.  The length should always be a multiple of the sector
    706  1.1  oster  * size, so there should be no problem with leftover bytes at the end.
    707  1.1  oster  */
    708  1.3  oster int
    709  1.3  oster rf_bxor(src, dest, len, bp)
    710  1.3  oster 	char   *src;
    711  1.3  oster 	char   *dest;
    712  1.3  oster 	int     len;
    713  1.3  oster 	void   *bp;
    714  1.3  oster {
    715  1.3  oster 	unsigned mask = sizeof(long) - 1, retcode = 0;
    716  1.3  oster 
    717  1.3  oster 	if (!(((unsigned long) src) & mask) && !(((unsigned long) dest) & mask) && !(len & mask)) {
    718  1.3  oster 		retcode = rf_longword_bxor((unsigned long *) src, (unsigned long *) dest, len >> RF_LONGSHIFT, bp);
    719  1.3  oster 	} else {
    720  1.3  oster 		RF_ASSERT(0);
    721  1.3  oster 	}
    722  1.3  oster 	return (retcode);
    723  1.1  oster }
    724  1.1  oster /* map a user buffer into kernel space, if necessary */
    725  1.1  oster #define REMAP_VA(_bp,x,y) (y) = (x)
    726  1.1  oster 
    727  1.1  oster /* When XORing in kernel mode, we need to map each user page to kernel space before we can access it.
    728  1.1  oster  * We don't want to assume anything about which input buffers are in kernel/user
    729  1.1  oster  * space, nor about their alignment, so in each loop we compute the maximum number
    730  1.1  oster  * of bytes that we can xor without crossing any page boundaries, and do only this many
    731  1.1  oster  * bytes before the next remap.
    732  1.1  oster  */
    733  1.3  oster int
    734  1.3  oster rf_longword_bxor(src, dest, len, bp)
    735  1.3  oster 	register unsigned long *src;
    736  1.3  oster 	register unsigned long *dest;
    737  1.3  oster 	int     len;		/* longwords */
    738  1.3  oster 	void   *bp;
    739  1.3  oster {
    740  1.3  oster 	register unsigned long *end = src + len;
    741  1.3  oster 	register unsigned long d0, d1, d2, d3, s0, s1, s2, s3;	/* temps */
    742  1.3  oster 	register unsigned long *pg_src, *pg_dest;	/* per-page source/dest
    743  1.3  oster 							 * pointers */
    744  1.3  oster 	int     longs_this_time;/* # longwords to xor in the current iteration */
    745  1.3  oster 
    746  1.3  oster 	REMAP_VA(bp, src, pg_src);
    747  1.3  oster 	REMAP_VA(bp, dest, pg_dest);
    748  1.3  oster 	if (!pg_src || !pg_dest)
    749  1.3  oster 		return (EFAULT);
    750  1.3  oster 
    751  1.3  oster 	while (len >= 4) {
    752  1.3  oster 		longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), RF_BLIP(pg_dest)) >> RF_LONGSHIFT);	/* note len in longwords */
    753  1.3  oster 		src += longs_this_time;
    754  1.3  oster 		dest += longs_this_time;
    755  1.3  oster 		len -= longs_this_time;
    756  1.3  oster 		while (longs_this_time >= 4) {
    757  1.3  oster 			d0 = pg_dest[0];
    758  1.3  oster 			d1 = pg_dest[1];
    759  1.3  oster 			d2 = pg_dest[2];
    760  1.3  oster 			d3 = pg_dest[3];
    761  1.3  oster 			s0 = pg_src[0];
    762  1.3  oster 			s1 = pg_src[1];
    763  1.3  oster 			s2 = pg_src[2];
    764  1.3  oster 			s3 = pg_src[3];
    765  1.3  oster 			pg_dest[0] = d0 ^ s0;
    766  1.3  oster 			pg_dest[1] = d1 ^ s1;
    767  1.3  oster 			pg_dest[2] = d2 ^ s2;
    768  1.3  oster 			pg_dest[3] = d3 ^ s3;
    769  1.3  oster 			pg_src += 4;
    770  1.3  oster 			pg_dest += 4;
    771  1.3  oster 			longs_this_time -= 4;
    772  1.3  oster 		}
    773  1.3  oster 		while (longs_this_time > 0) {	/* cannot cross any page
    774  1.3  oster 						 * boundaries here */
    775  1.3  oster 			*pg_dest++ ^= *pg_src++;
    776  1.3  oster 			longs_this_time--;
    777  1.3  oster 		}
    778  1.3  oster 
    779  1.3  oster 		/* either we're done, or we've reached a page boundary on one
    780  1.3  oster 		 * (or possibly both) of the pointers */
    781  1.3  oster 		if (len) {
    782  1.3  oster 			if (RF_PAGE_ALIGNED(src))
    783  1.3  oster 				REMAP_VA(bp, src, pg_src);
    784  1.3  oster 			if (RF_PAGE_ALIGNED(dest))
    785  1.3  oster 				REMAP_VA(bp, dest, pg_dest);
    786  1.3  oster 			if (!pg_src || !pg_dest)
    787  1.3  oster 				return (EFAULT);
    788  1.3  oster 		}
    789  1.3  oster 	}
    790  1.3  oster 	while (src < end) {
    791  1.3  oster 		*pg_dest++ ^= *pg_src++;
    792  1.3  oster 		src++;
    793  1.3  oster 		dest++;
    794  1.3  oster 		len--;
    795  1.3  oster 		if (RF_PAGE_ALIGNED(src))
    796  1.3  oster 			REMAP_VA(bp, src, pg_src);
    797  1.3  oster 		if (RF_PAGE_ALIGNED(dest))
    798  1.3  oster 			REMAP_VA(bp, dest, pg_dest);
    799  1.3  oster 	}
    800  1.3  oster 	RF_ASSERT(len == 0);
    801  1.3  oster 	return (0);
    802  1.1  oster }
    803  1.1  oster 
    804  1.1  oster 
    805  1.1  oster /*
    806  1.1  oster    dst = a ^ b ^ c;
    807  1.1  oster    a may equal dst
    808  1.1  oster    see comment above longword_bxor
    809  1.1  oster */
    810  1.3  oster int
    811  1.3  oster rf_longword_bxor3(dst, a, b, c, len, bp)
    812  1.3  oster 	register unsigned long *dst;
    813  1.3  oster 	register unsigned long *a;
    814  1.3  oster 	register unsigned long *b;
    815  1.3  oster 	register unsigned long *c;
    816  1.3  oster 	int     len;		/* length in longwords */
    817  1.3  oster 	void   *bp;
    818  1.3  oster {
    819  1.3  oster 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
    820  1.3  oster 	register unsigned long *pg_a, *pg_b, *pg_c, *pg_dst;	/* per-page source/dest
    821  1.3  oster 								 * pointers */
    822  1.3  oster 	int     longs_this_time;/* # longs to xor in the current iteration */
    823  1.3  oster 	char    dst_is_a = 0;
    824  1.3  oster 
    825  1.3  oster 	REMAP_VA(bp, a, pg_a);
    826  1.3  oster 	REMAP_VA(bp, b, pg_b);
    827  1.3  oster 	REMAP_VA(bp, c, pg_c);
    828  1.3  oster 	if (a == dst) {
    829  1.3  oster 		pg_dst = pg_a;
    830  1.3  oster 		dst_is_a = 1;
    831  1.3  oster 	} else {
    832  1.3  oster 		REMAP_VA(bp, dst, pg_dst);
    833  1.3  oster 	}
    834  1.3  oster 
    835  1.3  oster 	/* align dest to cache line.  Can't cross a pg boundary on dst here. */
    836  1.3  oster 	while ((((unsigned long) pg_dst) & 0x1f)) {
    837  1.3  oster 		*pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
    838  1.3  oster 		dst++;
    839  1.3  oster 		a++;
    840  1.3  oster 		b++;
    841  1.3  oster 		c++;
    842  1.3  oster 		if (RF_PAGE_ALIGNED(a)) {
    843  1.3  oster 			REMAP_VA(bp, a, pg_a);
    844  1.3  oster 			if (!pg_a)
    845  1.3  oster 				return (EFAULT);
    846  1.3  oster 		}
    847  1.3  oster 		if (RF_PAGE_ALIGNED(b)) {
    848  1.3  oster 			REMAP_VA(bp, a, pg_b);
    849  1.3  oster 			if (!pg_b)
    850  1.3  oster 				return (EFAULT);
    851  1.3  oster 		}
    852  1.3  oster 		if (RF_PAGE_ALIGNED(c)) {
    853  1.3  oster 			REMAP_VA(bp, a, pg_c);
    854  1.3  oster 			if (!pg_c)
    855  1.3  oster 				return (EFAULT);
    856  1.3  oster 		}
    857  1.3  oster 		len--;
    858  1.3  oster 	}
    859  1.3  oster 
    860  1.3  oster 	while (len > 4) {
    861  1.3  oster 		longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> RF_LONGSHIFT);
    862  1.3  oster 		a += longs_this_time;
    863  1.3  oster 		b += longs_this_time;
    864  1.3  oster 		c += longs_this_time;
    865  1.3  oster 		dst += longs_this_time;
    866  1.3  oster 		len -= longs_this_time;
    867  1.3  oster 		while (longs_this_time >= 4) {
    868  1.3  oster 			a0 = pg_a[0];
    869  1.3  oster 			longs_this_time -= 4;
    870  1.3  oster 
    871  1.3  oster 			a1 = pg_a[1];
    872  1.3  oster 			a2 = pg_a[2];
    873  1.3  oster 
    874  1.3  oster 			a3 = pg_a[3];
    875  1.3  oster 			pg_a += 4;
    876  1.3  oster 
    877  1.3  oster 			b0 = pg_b[0];
    878  1.3  oster 			b1 = pg_b[1];
    879  1.3  oster 
    880  1.3  oster 			b2 = pg_b[2];
    881  1.3  oster 			b3 = pg_b[3];
    882  1.3  oster 			/* start dual issue */
    883  1.3  oster 			a0 ^= b0;
    884  1.3  oster 			b0 = pg_c[0];
    885  1.3  oster 
    886  1.3  oster 			pg_b += 4;
    887  1.3  oster 			a1 ^= b1;
    888  1.3  oster 
    889  1.3  oster 			a2 ^= b2;
    890  1.3  oster 			a3 ^= b3;
    891  1.3  oster 
    892  1.3  oster 			b1 = pg_c[1];
    893  1.3  oster 			a0 ^= b0;
    894  1.3  oster 
    895  1.3  oster 			b2 = pg_c[2];
    896  1.3  oster 			a1 ^= b1;
    897  1.3  oster 
    898  1.3  oster 			b3 = pg_c[3];
    899  1.3  oster 			a2 ^= b2;
    900  1.3  oster 
    901  1.3  oster 			pg_dst[0] = a0;
    902  1.3  oster 			a3 ^= b3;
    903  1.3  oster 			pg_dst[1] = a1;
    904  1.3  oster 			pg_c += 4;
    905  1.3  oster 			pg_dst[2] = a2;
    906  1.3  oster 			pg_dst[3] = a3;
    907  1.3  oster 			pg_dst += 4;
    908  1.3  oster 		}
    909  1.3  oster 		while (longs_this_time > 0) {	/* cannot cross any page
    910  1.3  oster 						 * boundaries here */
    911  1.3  oster 			*pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
    912  1.3  oster 			longs_this_time--;
    913  1.3  oster 		}
    914  1.3  oster 
    915  1.3  oster 		if (len) {
    916  1.3  oster 			if (RF_PAGE_ALIGNED(a)) {
    917  1.3  oster 				REMAP_VA(bp, a, pg_a);
    918  1.3  oster 				if (!pg_a)
    919  1.3  oster 					return (EFAULT);
    920  1.3  oster 				if (dst_is_a)
    921  1.3  oster 					pg_dst = pg_a;
    922  1.3  oster 			}
    923  1.3  oster 			if (RF_PAGE_ALIGNED(b)) {
    924  1.3  oster 				REMAP_VA(bp, b, pg_b);
    925  1.3  oster 				if (!pg_b)
    926  1.3  oster 					return (EFAULT);
    927  1.3  oster 			}
    928  1.3  oster 			if (RF_PAGE_ALIGNED(c)) {
    929  1.3  oster 				REMAP_VA(bp, c, pg_c);
    930  1.3  oster 				if (!pg_c)
    931  1.3  oster 					return (EFAULT);
    932  1.3  oster 			}
    933  1.3  oster 			if (!dst_is_a)
    934  1.3  oster 				if (RF_PAGE_ALIGNED(dst)) {
    935  1.3  oster 					REMAP_VA(bp, dst, pg_dst);
    936  1.3  oster 					if (!pg_dst)
    937  1.3  oster 						return (EFAULT);
    938  1.3  oster 				}
    939  1.3  oster 		}
    940  1.3  oster 	}
    941  1.3  oster 	while (len) {
    942  1.3  oster 		*pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
    943  1.3  oster 		dst++;
    944  1.3  oster 		a++;
    945  1.3  oster 		b++;
    946  1.3  oster 		c++;
    947  1.3  oster 		if (RF_PAGE_ALIGNED(a)) {
    948  1.3  oster 			REMAP_VA(bp, a, pg_a);
    949  1.3  oster 			if (!pg_a)
    950  1.3  oster 				return (EFAULT);
    951  1.3  oster 			if (dst_is_a)
    952  1.3  oster 				pg_dst = pg_a;
    953  1.3  oster 		}
    954  1.3  oster 		if (RF_PAGE_ALIGNED(b)) {
    955  1.3  oster 			REMAP_VA(bp, b, pg_b);
    956  1.3  oster 			if (!pg_b)
    957  1.3  oster 				return (EFAULT);
    958  1.3  oster 		}
    959  1.3  oster 		if (RF_PAGE_ALIGNED(c)) {
    960  1.3  oster 			REMAP_VA(bp, c, pg_c);
    961  1.3  oster 			if (!pg_c)
    962  1.3  oster 				return (EFAULT);
    963  1.3  oster 		}
    964  1.3  oster 		if (!dst_is_a)
    965  1.3  oster 			if (RF_PAGE_ALIGNED(dst)) {
    966  1.3  oster 				REMAP_VA(bp, dst, pg_dst);
    967  1.3  oster 				if (!pg_dst)
    968  1.3  oster 					return (EFAULT);
    969  1.3  oster 			}
    970  1.3  oster 		len--;
    971  1.3  oster 	}
    972  1.3  oster 	return (0);
    973  1.3  oster }
    974  1.3  oster 
    975  1.3  oster int
    976  1.3  oster rf_bxor3(dst, a, b, c, len, bp)
    977  1.3  oster 	register unsigned char *dst;
    978  1.3  oster 	register unsigned char *a;
    979  1.3  oster 	register unsigned char *b;
    980  1.3  oster 	register unsigned char *c;
    981  1.3  oster 	unsigned long len;
    982  1.3  oster 	void   *bp;
    983  1.1  oster {
    984  1.3  oster 	RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7) == 0);
    985  1.1  oster 
    986  1.3  oster 	return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a,
    987  1.3  oster 		(unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT, bp));
    988  1.1  oster }
    989