Home | History | Annotate | Line # | Download | only in raidframe
rf_paritylogDiskMgr.c revision 1.8
      1  1.8  oster /*	$NetBSD: rf_paritylogDiskMgr.c,v 1.8 2000/01/14 01:00:26 oster Exp $	*/
      2  1.1  oster /*
      3  1.1  oster  * Copyright (c) 1995 Carnegie-Mellon University.
      4  1.1  oster  * All rights reserved.
      5  1.1  oster  *
      6  1.1  oster  * Author: William V. Courtright II
      7  1.1  oster  *
      8  1.1  oster  * Permission to use, copy, modify and distribute this software and
      9  1.1  oster  * its documentation is hereby granted, provided that both the copyright
     10  1.1  oster  * notice and this permission notice appear in all copies of the
     11  1.1  oster  * software, derivative works or modified versions, and any portions
     12  1.1  oster  * thereof, and that both notices appear in supporting documentation.
     13  1.1  oster  *
     14  1.1  oster  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  1.1  oster  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  1.1  oster  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  1.1  oster  *
     18  1.1  oster  * Carnegie Mellon requests users of this software to return to
     19  1.1  oster  *
     20  1.1  oster  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  1.1  oster  *  School of Computer Science
     22  1.1  oster  *  Carnegie Mellon University
     23  1.1  oster  *  Pittsburgh PA 15213-3890
     24  1.1  oster  *
     25  1.1  oster  * any improvements or extensions that they make and grant Carnegie the
     26  1.1  oster  * rights to redistribute these changes.
     27  1.1  oster  */
     28  1.1  oster /* Code for flushing and reintegration operations related to parity logging.
     29  1.1  oster  *
     30  1.1  oster  */
     31  1.1  oster 
     32  1.1  oster #include "rf_archs.h"
     33  1.1  oster 
     34  1.1  oster #if RF_INCLUDE_PARITYLOGGING > 0
     35  1.1  oster 
     36  1.1  oster #include "rf_types.h"
     37  1.1  oster #include "rf_threadstuff.h"
     38  1.1  oster #include "rf_mcpair.h"
     39  1.1  oster #include "rf_raid.h"
     40  1.1  oster #include "rf_dag.h"
     41  1.1  oster #include "rf_dagfuncs.h"
     42  1.1  oster #include "rf_desc.h"
     43  1.1  oster #include "rf_layout.h"
     44  1.1  oster #include "rf_diskqueue.h"
     45  1.1  oster #include "rf_paritylog.h"
     46  1.1  oster #include "rf_general.h"
     47  1.1  oster #include "rf_etimer.h"
     48  1.1  oster #include "rf_paritylogging.h"
     49  1.1  oster #include "rf_engine.h"
     50  1.1  oster #include "rf_dagutils.h"
     51  1.1  oster #include "rf_map.h"
     52  1.1  oster #include "rf_parityscan.h"
     53  1.1  oster 
     54  1.1  oster #include "rf_paritylogDiskMgr.h"
     55  1.1  oster 
     56  1.1  oster static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *);
     57  1.1  oster 
     58  1.3  oster static caddr_t
     59  1.3  oster AcquireReintBuffer(pool)
     60  1.3  oster 	RF_RegionBufferQueue_t *pool;
     61  1.3  oster {
     62  1.3  oster 	caddr_t bufPtr = NULL;
     63  1.3  oster 
     64  1.3  oster 	/* Return a region buffer from the free list (pool). If the free list
     65  1.3  oster 	 * is empty, WAIT. BLOCKING */
     66  1.3  oster 
     67  1.3  oster 	RF_LOCK_MUTEX(pool->mutex);
     68  1.3  oster 	if (pool->availableBuffers > 0) {
     69  1.3  oster 		bufPtr = pool->buffers[pool->availBuffersIndex];
     70  1.3  oster 		pool->availableBuffers--;
     71  1.3  oster 		pool->availBuffersIndex++;
     72  1.3  oster 		if (pool->availBuffersIndex == pool->totalBuffers)
     73  1.3  oster 			pool->availBuffersIndex = 0;
     74  1.3  oster 		RF_UNLOCK_MUTEX(pool->mutex);
     75  1.3  oster 	} else {
     76  1.3  oster 		RF_PANIC();	/* should never happen in currect config,
     77  1.3  oster 				 * single reint */
     78  1.3  oster 		RF_WAIT_COND(pool->cond, pool->mutex);
     79  1.3  oster 	}
     80  1.3  oster 	return (bufPtr);
     81  1.3  oster }
     82  1.3  oster 
     83  1.3  oster static void
     84  1.3  oster ReleaseReintBuffer(
     85  1.3  oster     RF_RegionBufferQueue_t * pool,
     86  1.3  oster     caddr_t bufPtr)
     87  1.3  oster {
     88  1.3  oster 	/* Insert a region buffer (bufPtr) into the free list (pool).
     89  1.3  oster 	 * NON-BLOCKING */
     90  1.3  oster 
     91  1.3  oster 	RF_LOCK_MUTEX(pool->mutex);
     92  1.3  oster 	pool->availableBuffers++;
     93  1.3  oster 	pool->buffers[pool->emptyBuffersIndex] = bufPtr;
     94  1.3  oster 	pool->emptyBuffersIndex++;
     95  1.3  oster 	if (pool->emptyBuffersIndex == pool->totalBuffers)
     96  1.3  oster 		pool->emptyBuffersIndex = 0;
     97  1.3  oster 	RF_ASSERT(pool->availableBuffers <= pool->totalBuffers);
     98  1.3  oster 	RF_UNLOCK_MUTEX(pool->mutex);
     99  1.3  oster 	RF_SIGNAL_COND(pool->cond);
    100  1.3  oster }
    101  1.3  oster 
    102  1.3  oster 
    103  1.1  oster 
    104  1.3  oster static void
    105  1.3  oster ReadRegionLog(
    106  1.3  oster     RF_RegionId_t regionID,
    107  1.3  oster     RF_MCPair_t * rrd_mcpair,
    108  1.3  oster     caddr_t regionBuffer,
    109  1.3  oster     RF_Raid_t * raidPtr,
    110  1.3  oster     RF_DagHeader_t ** rrd_dag_h,
    111  1.3  oster     RF_AllocListElem_t ** rrd_alloclist,
    112  1.3  oster     RF_PhysDiskAddr_t ** rrd_pda)
    113  1.3  oster {
    114  1.3  oster 	/* Initiate the read a region log from disk.  Once initiated, return
    115  1.3  oster 	 * to the calling routine.
    116  1.3  oster 	 *
    117  1.3  oster 	 * NON-BLOCKING */
    118  1.3  oster 
    119  1.8  oster 	RF_AccTraceEntry_t *tracerec;
    120  1.3  oster 	RF_DagNode_t *rrd_rdNode;
    121  1.3  oster 
    122  1.3  oster 	/* create DAG to read region log from disk */
    123  1.3  oster 	rf_MakeAllocList(*rrd_alloclist);
    124  1.3  oster 	*rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, rf_DiskReadFunc, rf_DiskReadUndoFunc,
    125  1.3  oster 	    "Rrl", *rrd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
    126  1.3  oster 
    127  1.3  oster 	/* create and initialize PDA for the core log */
    128  1.3  oster 	/* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
    129  1.3  oster 	 * *)); */
    130  1.3  oster 	*rrd_pda = rf_AllocPDAList(1);
    131  1.3  oster 	rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), &((*rrd_pda)->col), &((*rrd_pda)->startSector));
    132  1.3  oster 	(*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity;
    133  1.3  oster 
    134  1.3  oster 	if ((*rrd_pda)->next) {
    135  1.3  oster 		(*rrd_pda)->next = NULL;
    136  1.3  oster 		printf("set rrd_pda->next to NULL\n");
    137  1.3  oster 	}
    138  1.3  oster 	/* initialize DAG parameters */
    139  1.8  oster 	RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
    140  1.8  oster 	bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
    141  1.8  oster 	(*rrd_dag_h)->tracerec = tracerec;
    142  1.3  oster 	rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0];
    143  1.3  oster 	rrd_rdNode->params[0].p = *rrd_pda;
    144  1.1  oster /*  rrd_rdNode->params[1] = regionBuffer; */
    145  1.3  oster 	rrd_rdNode->params[2].v = 0;
    146  1.3  oster 	rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
    147  1.1  oster 
    148  1.3  oster 	/* launch region log read dag */
    149  1.3  oster 	rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
    150  1.3  oster 	    (void *) rrd_mcpair);
    151  1.1  oster }
    152  1.1  oster 
    153  1.1  oster 
    154  1.1  oster 
    155  1.3  oster static void
    156  1.3  oster WriteCoreLog(
    157  1.3  oster     RF_ParityLog_t * log,
    158  1.3  oster     RF_MCPair_t * fwr_mcpair,
    159  1.3  oster     RF_Raid_t * raidPtr,
    160  1.3  oster     RF_DagHeader_t ** fwr_dag_h,
    161  1.3  oster     RF_AllocListElem_t ** fwr_alloclist,
    162  1.3  oster     RF_PhysDiskAddr_t ** fwr_pda)
    163  1.3  oster {
    164  1.3  oster 	RF_RegionId_t regionID = log->regionID;
    165  1.8  oster 	RF_AccTraceEntry_t *tracerec;
    166  1.3  oster 	RF_SectorNum_t regionOffset;
    167  1.3  oster 	RF_DagNode_t *fwr_wrNode;
    168  1.3  oster 
    169  1.3  oster 	/* Initiate the write of a core log to a region log disk. Once
    170  1.3  oster 	 * initiated, return to the calling routine.
    171  1.3  oster 	 *
    172  1.3  oster 	 * NON-BLOCKING */
    173  1.3  oster 
    174  1.3  oster 	/* create DAG to write a core log to a region log disk */
    175  1.3  oster 	rf_MakeAllocList(*fwr_alloclist);
    176  1.3  oster 	*fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    177  1.3  oster 	    "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
    178  1.3  oster 
    179  1.3  oster 	/* create and initialize PDA for the region log */
    180  1.3  oster 	/* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
    181  1.3  oster 	 * *)); */
    182  1.3  oster 	*fwr_pda = rf_AllocPDAList(1);
    183  1.3  oster 	regionOffset = log->diskOffset;
    184  1.3  oster 	rf_MapLogParityLogging(raidPtr, regionID, regionOffset, &((*fwr_pda)->row), &((*fwr_pda)->col), &((*fwr_pda)->startSector));
    185  1.3  oster 	(*fwr_pda)->numSector = raidPtr->numSectorsPerLog;
    186  1.3  oster 
    187  1.3  oster 	/* initialize DAG parameters */
    188  1.8  oster 	RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
    189  1.8  oster 	bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
    190  1.8  oster 	(*fwr_dag_h)->tracerec = tracerec;
    191  1.3  oster 	fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0];
    192  1.3  oster 	fwr_wrNode->params[0].p = *fwr_pda;
    193  1.1  oster /*  fwr_wrNode->params[1] = log->bufPtr; */
    194  1.3  oster 	fwr_wrNode->params[2].v = 0;
    195  1.3  oster 	fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
    196  1.3  oster 
    197  1.3  oster 	/* launch the dag to write the core log to disk */
    198  1.3  oster 	rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
    199  1.3  oster 	    (void *) fwr_mcpair);
    200  1.3  oster }
    201  1.3  oster 
    202  1.3  oster 
    203  1.3  oster static void
    204  1.3  oster ReadRegionParity(
    205  1.3  oster     RF_RegionId_t regionID,
    206  1.3  oster     RF_MCPair_t * prd_mcpair,
    207  1.3  oster     caddr_t parityBuffer,
    208  1.3  oster     RF_Raid_t * raidPtr,
    209  1.3  oster     RF_DagHeader_t ** prd_dag_h,
    210  1.3  oster     RF_AllocListElem_t ** prd_alloclist,
    211  1.3  oster     RF_PhysDiskAddr_t ** prd_pda)
    212  1.3  oster {
    213  1.3  oster 	/* Initiate the read region parity from disk. Once initiated, return
    214  1.3  oster 	 * to the calling routine.
    215  1.3  oster 	 *
    216  1.3  oster 	 * NON-BLOCKING */
    217  1.3  oster 
    218  1.8  oster 	RF_AccTraceEntry_t *tracerec;
    219  1.3  oster 	RF_DagNode_t *prd_rdNode;
    220  1.3  oster 
    221  1.3  oster 	/* create DAG to read region parity from disk */
    222  1.3  oster 	rf_MakeAllocList(*prd_alloclist);
    223  1.3  oster 	*prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, rf_DiskReadUndoFunc,
    224  1.3  oster 	    "Rrp", *prd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
    225  1.3  oster 
    226  1.3  oster 	/* create and initialize PDA for region parity */
    227  1.3  oster 	/* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
    228  1.3  oster 	 * *)); */
    229  1.3  oster 	*prd_pda = rf_AllocPDAList(1);
    230  1.3  oster 	rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), &((*prd_pda)->col), &((*prd_pda)->startSector), &((*prd_pda)->numSector));
    231  1.3  oster 	if (rf_parityLogDebug)
    232  1.3  oster 		printf("[reading %d sectors of parity from region %d]\n",
    233  1.3  oster 		    (int) (*prd_pda)->numSector, regionID);
    234  1.3  oster 	if ((*prd_pda)->next) {
    235  1.3  oster 		(*prd_pda)->next = NULL;
    236  1.3  oster 		printf("set prd_pda->next to NULL\n");
    237  1.3  oster 	}
    238  1.3  oster 	/* initialize DAG parameters */
    239  1.8  oster 	RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
    240  1.8  oster 	bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
    241  1.8  oster 	(*prd_dag_h)->tracerec = tracerec;
    242  1.3  oster 	prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0];
    243  1.3  oster 	prd_rdNode->params[0].p = *prd_pda;
    244  1.3  oster 	prd_rdNode->params[1].p = parityBuffer;
    245  1.3  oster 	prd_rdNode->params[2].v = 0;
    246  1.3  oster 	prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
    247  1.3  oster 	if (rf_validateDAGDebug)
    248  1.3  oster 		rf_ValidateDAG(*prd_dag_h);
    249  1.3  oster 	/* launch region parity read dag */
    250  1.3  oster 	rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
    251  1.3  oster 	    (void *) prd_mcpair);
    252  1.3  oster }
    253  1.3  oster 
    254  1.3  oster static void
    255  1.3  oster WriteRegionParity(
    256  1.3  oster     RF_RegionId_t regionID,
    257  1.3  oster     RF_MCPair_t * pwr_mcpair,
    258  1.3  oster     caddr_t parityBuffer,
    259  1.3  oster     RF_Raid_t * raidPtr,
    260  1.3  oster     RF_DagHeader_t ** pwr_dag_h,
    261  1.3  oster     RF_AllocListElem_t ** pwr_alloclist,
    262  1.3  oster     RF_PhysDiskAddr_t ** pwr_pda)
    263  1.3  oster {
    264  1.3  oster 	/* Initiate the write of region parity to disk. Once initiated, return
    265  1.3  oster 	 * to the calling routine.
    266  1.3  oster 	 *
    267  1.3  oster 	 * NON-BLOCKING */
    268  1.3  oster 
    269  1.8  oster 	RF_AccTraceEntry_t *tracerec;
    270  1.3  oster 	RF_DagNode_t *pwr_wrNode;
    271  1.3  oster 
    272  1.3  oster 	/* create DAG to write region log from disk */
    273  1.3  oster 	rf_MakeAllocList(*pwr_alloclist);
    274  1.3  oster 	*pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    275  1.3  oster 	    "Wrp", *pwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
    276  1.3  oster 
    277  1.3  oster 	/* create and initialize PDA for region parity */
    278  1.3  oster 	/* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
    279  1.3  oster 	 * *)); */
    280  1.3  oster 	*pwr_pda = rf_AllocPDAList(1);
    281  1.3  oster 	rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), &((*pwr_pda)->col), &((*pwr_pda)->startSector), &((*pwr_pda)->numSector));
    282  1.3  oster 
    283  1.3  oster 	/* initialize DAG parameters */
    284  1.8  oster 	RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
    285  1.8  oster 	bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
    286  1.8  oster 	(*pwr_dag_h)->tracerec = tracerec;
    287  1.3  oster 	pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0];
    288  1.3  oster 	pwr_wrNode->params[0].p = *pwr_pda;
    289  1.1  oster /*  pwr_wrNode->params[1] = parityBuffer; */
    290  1.3  oster 	pwr_wrNode->params[2].v = 0;
    291  1.3  oster 	pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
    292  1.1  oster 
    293  1.3  oster 	/* launch the dag to write region parity to disk */
    294  1.3  oster 	rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
    295  1.3  oster 	    (void *) pwr_mcpair);
    296  1.3  oster }
    297  1.3  oster 
    298  1.3  oster static void
    299  1.3  oster FlushLogsToDisk(
    300  1.3  oster     RF_Raid_t * raidPtr,
    301  1.3  oster     RF_ParityLog_t * logList)
    302  1.3  oster {
    303  1.3  oster 	/* Flush a linked list of core logs to the log disk. Logs contain the
    304  1.3  oster 	 * disk location where they should be written.  Logs were written in
    305  1.3  oster 	 * FIFO order and that order must be preserved.
    306  1.3  oster 	 *
    307  1.3  oster 	 * Recommended optimizations: 1) allow multiple flushes to occur
    308  1.3  oster 	 * simultaneously 2) coalesce contiguous flush operations
    309  1.3  oster 	 *
    310  1.3  oster 	 * BLOCKING */
    311  1.3  oster 
    312  1.3  oster 	RF_ParityLog_t *log;
    313  1.3  oster 	RF_RegionId_t regionID;
    314  1.3  oster 	RF_MCPair_t *fwr_mcpair;
    315  1.3  oster 	RF_DagHeader_t *fwr_dag_h;
    316  1.3  oster 	RF_AllocListElem_t *fwr_alloclist;
    317  1.3  oster 	RF_PhysDiskAddr_t *fwr_pda;
    318  1.3  oster 
    319  1.3  oster 	fwr_mcpair = rf_AllocMCPair();
    320  1.3  oster 	RF_LOCK_MUTEX(fwr_mcpair->mutex);
    321  1.3  oster 
    322  1.3  oster 	RF_ASSERT(logList);
    323  1.3  oster 	log = logList;
    324  1.3  oster 	while (log) {
    325  1.3  oster 		regionID = log->regionID;
    326  1.3  oster 
    327  1.3  oster 		/* create and launch a DAG to write the core log */
    328  1.3  oster 		if (rf_parityLogDebug)
    329  1.3  oster 			printf("[initiating write of core log for region %d]\n", regionID);
    330  1.3  oster 		fwr_mcpair->flag = RF_FALSE;
    331  1.3  oster 		WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, &fwr_alloclist, &fwr_pda);
    332  1.3  oster 
    333  1.3  oster 		/* wait for the DAG to complete */
    334  1.3  oster 		while (!fwr_mcpair->flag)
    335  1.3  oster 			RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex);
    336  1.3  oster 		if (fwr_dag_h->status != rf_enable) {
    337  1.3  oster 			RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID);
    338  1.3  oster 			RF_ASSERT(0);
    339  1.3  oster 		}
    340  1.3  oster 		/* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */
    341  1.3  oster 		rf_FreePhysDiskAddr(fwr_pda);
    342  1.3  oster 		rf_FreeDAG(fwr_dag_h);
    343  1.3  oster 		rf_FreeAllocList(fwr_alloclist);
    344  1.3  oster 
    345  1.3  oster 		log = log->next;
    346  1.3  oster 	}
    347  1.3  oster 	RF_UNLOCK_MUTEX(fwr_mcpair->mutex);
    348  1.3  oster 	rf_FreeMCPair(fwr_mcpair);
    349  1.3  oster 	rf_ReleaseParityLogs(raidPtr, logList);
    350  1.3  oster }
    351  1.3  oster 
    352  1.3  oster static void
    353  1.3  oster ReintegrateRegion(
    354  1.3  oster     RF_Raid_t * raidPtr,
    355  1.3  oster     RF_RegionId_t regionID,
    356  1.3  oster     RF_ParityLog_t * coreLog)
    357  1.3  oster {
    358  1.3  oster 	RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair;
    359  1.3  oster 	RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h;
    360  1.3  oster 	RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist;
    361  1.3  oster 	RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda;
    362  1.3  oster 	caddr_t parityBuffer, regionBuffer = NULL;
    363  1.3  oster 
    364  1.3  oster 	/* Reintegrate a region (regionID). 1. acquire region and parity
    365  1.3  oster 	 * buffers 2. read log from disk 3. read parity from disk 4. apply log
    366  1.3  oster 	 * to parity 5. apply core log to parity 6. write new parity to disk
    367  1.3  oster 	 *
    368  1.3  oster 	 * BLOCKING */
    369  1.3  oster 
    370  1.3  oster 	if (rf_parityLogDebug)
    371  1.3  oster 		printf("[reintegrating region %d]\n", regionID);
    372  1.3  oster 
    373  1.3  oster 	/* initiate read of region parity */
    374  1.3  oster 	if (rf_parityLogDebug)
    375  1.3  oster 		printf("[initiating read of parity for region %d]\n", regionID);
    376  1.3  oster 	parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool);
    377  1.3  oster 	prd_mcpair = rf_AllocMCPair();
    378  1.3  oster 	RF_LOCK_MUTEX(prd_mcpair->mutex);
    379  1.3  oster 	prd_mcpair->flag = RF_FALSE;
    380  1.3  oster 	ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, &prd_dag_h, &prd_alloclist, &prd_pda);
    381  1.3  oster 
    382  1.3  oster 	/* if region log nonempty, initiate read */
    383  1.3  oster 	if (raidPtr->regionInfo[regionID].diskCount > 0) {
    384  1.3  oster 		if (rf_parityLogDebug)
    385  1.3  oster 			printf("[initiating read of disk log for region %d]\n", regionID);
    386  1.3  oster 		regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool);
    387  1.3  oster 		rrd_mcpair = rf_AllocMCPair();
    388  1.3  oster 		RF_LOCK_MUTEX(rrd_mcpair->mutex);
    389  1.3  oster 		rrd_mcpair->flag = RF_FALSE;
    390  1.3  oster 		ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, &rrd_dag_h, &rrd_alloclist, &rrd_pda);
    391  1.3  oster 	}
    392  1.3  oster 	/* wait on read of region parity to complete */
    393  1.3  oster 	while (!prd_mcpair->flag) {
    394  1.3  oster 		RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex);
    395  1.3  oster 	}
    396  1.3  oster 	RF_UNLOCK_MUTEX(prd_mcpair->mutex);
    397  1.3  oster 	if (prd_dag_h->status != rf_enable) {
    398  1.3  oster 		RF_ERRORMSG("Unable to read parity from disk\n");
    399  1.3  oster 		/* add code to fail the parity disk */
    400  1.3  oster 		RF_ASSERT(0);
    401  1.3  oster 	}
    402  1.3  oster 	/* apply core log to parity */
    403  1.3  oster 	/* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */
    404  1.3  oster 
    405  1.3  oster 	if (raidPtr->regionInfo[regionID].diskCount > 0) {
    406  1.3  oster 		/* wait on read of region log to complete */
    407  1.3  oster 		while (!rrd_mcpair->flag)
    408  1.3  oster 			RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex);
    409  1.3  oster 		RF_UNLOCK_MUTEX(rrd_mcpair->mutex);
    410  1.3  oster 		if (rrd_dag_h->status != rf_enable) {
    411  1.3  oster 			RF_ERRORMSG("Unable to read region log from disk\n");
    412  1.3  oster 			/* add code to fail the log disk */
    413  1.3  oster 			RF_ASSERT(0);
    414  1.3  oster 		}
    415  1.3  oster 		/* apply region log to parity */
    416  1.3  oster 		/* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */
    417  1.3  oster 		/* release resources associated with region log */
    418  1.3  oster 		/* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */
    419  1.3  oster 		rf_FreePhysDiskAddr(rrd_pda);
    420  1.3  oster 		rf_FreeDAG(rrd_dag_h);
    421  1.3  oster 		rf_FreeAllocList(rrd_alloclist);
    422  1.3  oster 		rf_FreeMCPair(rrd_mcpair);
    423  1.3  oster 		ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer);
    424  1.3  oster 	}
    425  1.3  oster 	/* write reintegrated parity to disk */
    426  1.3  oster 	if (rf_parityLogDebug)
    427  1.3  oster 		printf("[initiating write of parity for region %d]\n", regionID);
    428  1.3  oster 	pwr_mcpair = rf_AllocMCPair();
    429  1.3  oster 	RF_LOCK_MUTEX(pwr_mcpair->mutex);
    430  1.3  oster 	pwr_mcpair->flag = RF_FALSE;
    431  1.3  oster 	WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, &pwr_dag_h, &pwr_alloclist, &pwr_pda);
    432  1.3  oster 	while (!pwr_mcpair->flag)
    433  1.3  oster 		RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex);
    434  1.3  oster 	RF_UNLOCK_MUTEX(pwr_mcpair->mutex);
    435  1.3  oster 	if (pwr_dag_h->status != rf_enable) {
    436  1.3  oster 		RF_ERRORMSG("Unable to write parity to disk\n");
    437  1.3  oster 		/* add code to fail the parity disk */
    438  1.3  oster 		RF_ASSERT(0);
    439  1.3  oster 	}
    440  1.3  oster 	/* release resources associated with read of old parity */
    441  1.3  oster 	/* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */
    442  1.3  oster 	rf_FreePhysDiskAddr(prd_pda);
    443  1.3  oster 	rf_FreeDAG(prd_dag_h);
    444  1.3  oster 	rf_FreeAllocList(prd_alloclist);
    445  1.3  oster 	rf_FreeMCPair(prd_mcpair);
    446  1.3  oster 
    447  1.3  oster 	/* release resources associated with write of new parity */
    448  1.3  oster 	ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer);
    449  1.3  oster 	/* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */
    450  1.3  oster 	rf_FreePhysDiskAddr(pwr_pda);
    451  1.3  oster 	rf_FreeDAG(pwr_dag_h);
    452  1.3  oster 	rf_FreeAllocList(pwr_alloclist);
    453  1.3  oster 	rf_FreeMCPair(pwr_mcpair);
    454  1.3  oster 
    455  1.3  oster 	if (rf_parityLogDebug)
    456  1.3  oster 		printf("[finished reintegrating region %d]\n", regionID);
    457  1.3  oster }
    458  1.3  oster 
    459  1.3  oster 
    460  1.3  oster 
    461  1.3  oster static void
    462  1.3  oster ReintegrateLogs(
    463  1.3  oster     RF_Raid_t * raidPtr,
    464  1.3  oster     RF_ParityLog_t * logList)
    465  1.3  oster {
    466  1.3  oster 	RF_ParityLog_t *log, *freeLogList = NULL;
    467  1.3  oster 	RF_ParityLogData_t *logData, *logDataList;
    468  1.3  oster 	RF_RegionId_t regionID;
    469  1.3  oster 
    470  1.3  oster 	RF_ASSERT(logList);
    471  1.3  oster 	while (logList) {
    472  1.3  oster 		log = logList;
    473  1.3  oster 		logList = logList->next;
    474  1.3  oster 		log->next = NULL;
    475  1.3  oster 		regionID = log->regionID;
    476  1.3  oster 		ReintegrateRegion(raidPtr, regionID, log);
    477  1.3  oster 		log->numRecords = 0;
    478  1.3  oster 
    479  1.3  oster 		/* remove all items which are blocked on reintegration of this
    480  1.3  oster 		 * region */
    481  1.3  oster 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    482  1.3  oster 		logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
    483  1.3  oster 		logDataList = logData;
    484  1.3  oster 		while (logData) {
    485  1.3  oster 			logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
    486  1.3  oster 			logData = logData->next;
    487  1.3  oster 		}
    488  1.3  oster 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    489  1.3  oster 
    490  1.3  oster 		/* process blocked log data and clear reintInProgress flag for
    491  1.3  oster 		 * this region */
    492  1.3  oster 		if (logDataList)
    493  1.3  oster 			rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE);
    494  1.3  oster 		else {
    495  1.3  oster 			/* Enable flushing for this region.  Holding both
    496  1.3  oster 			 * locks provides a synchronization barrier with
    497  1.3  oster 			 * DumpParityLogToDisk */
    498  1.3  oster 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    499  1.3  oster 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    500  1.3  oster 			RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    501  1.3  oster 			raidPtr->regionInfo[regionID].diskCount = 0;
    502  1.3  oster 			raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
    503  1.3  oster 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    504  1.3  oster 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);	/* flushing is now
    505  1.3  oster 											 * enabled */
    506  1.3  oster 			RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    507  1.3  oster 		}
    508  1.3  oster 		/* if log wasn't used, attach it to the list of logs to be
    509  1.3  oster 		 * returned */
    510  1.3  oster 		if (log) {
    511  1.3  oster 			log->next = freeLogList;
    512  1.3  oster 			freeLogList = log;
    513  1.3  oster 		}
    514  1.3  oster 	}
    515  1.3  oster 	if (freeLogList)
    516  1.3  oster 		rf_ReleaseParityLogs(raidPtr, freeLogList);
    517  1.3  oster }
    518  1.3  oster 
    519  1.3  oster int
    520  1.3  oster rf_ShutdownLogging(RF_Raid_t * raidPtr)
    521  1.3  oster {
    522  1.3  oster 	/* shutdown parity logging 1) disable parity logging in all regions 2)
    523  1.3  oster 	 * reintegrate all regions */
    524  1.3  oster 
    525  1.3  oster 	RF_SectorCount_t diskCount;
    526  1.3  oster 	RF_RegionId_t regionID;
    527  1.3  oster 	RF_ParityLog_t *log;
    528  1.3  oster 
    529  1.3  oster 	if (rf_parityLogDebug)
    530  1.3  oster 		printf("[shutting down parity logging]\n");
    531  1.3  oster 	/* Since parity log maps are volatile, we must reintegrate all
    532  1.3  oster 	 * regions. */
    533  1.3  oster 	if (rf_forceParityLogReint) {
    534  1.3  oster 		for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
    535  1.3  oster 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    536  1.3  oster 			raidPtr->regionInfo[regionID].loggingEnabled = RF_FALSE;
    537  1.3  oster 			log = raidPtr->regionInfo[regionID].coreLog;
    538  1.3  oster 			raidPtr->regionInfo[regionID].coreLog = NULL;
    539  1.3  oster 			diskCount = raidPtr->regionInfo[regionID].diskCount;
    540  1.3  oster 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    541  1.3  oster 			if (diskCount > 0 || log != NULL)
    542  1.3  oster 				ReintegrateRegion(raidPtr, regionID, log);
    543  1.3  oster 			if (log != NULL)
    544  1.3  oster 				rf_ReleaseParityLogs(raidPtr, log);
    545  1.3  oster 		}
    546  1.3  oster 	}
    547  1.3  oster 	if (rf_parityLogDebug) {
    548  1.3  oster 		printf("[parity logging disabled]\n");
    549  1.3  oster 		printf("[should be done!]\n");
    550  1.3  oster 	}
    551  1.3  oster 	return (0);
    552  1.3  oster }
    553  1.3  oster 
    554  1.3  oster int
    555  1.3  oster rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr)
    556  1.3  oster {
    557  1.3  oster 	RF_ParityLog_t *reintQueue, *flushQueue;
    558  1.3  oster 	int     workNeeded, done = RF_FALSE;
    559  1.8  oster 	int s;
    560  1.3  oster 
    561  1.3  oster 	/* Main program for parity logging disk thread.  This routine waits
    562  1.3  oster 	 * for work to appear in either the flush or reintegration queues and
    563  1.3  oster 	 * is responsible for flushing core logs to the log disk as well as
    564  1.3  oster 	 * reintegrating parity regions.
    565  1.3  oster 	 *
    566  1.3  oster 	 * BLOCKING */
    567  1.3  oster 
    568  1.8  oster 	s = splbio();
    569  1.8  oster 
    570  1.3  oster 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    571  1.3  oster 
    572  1.3  oster 	/*
    573  1.3  oster          * Inform our creator that we're running. Don't bother doing the
    574  1.3  oster          * mutex lock/unlock dance- we locked above, and we'll unlock
    575  1.3  oster          * below with nothing to do, yet.
    576  1.3  oster          */
    577  1.3  oster 	raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING;
    578  1.3  oster 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
    579  1.3  oster 
    580  1.3  oster 	/* empty the work queues */
    581  1.3  oster 	flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
    582  1.3  oster 	raidPtr->parityLogDiskQueue.flushQueue = NULL;
    583  1.3  oster 	reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
    584  1.3  oster 	raidPtr->parityLogDiskQueue.reintQueue = NULL;
    585  1.3  oster 	workNeeded = (flushQueue || reintQueue);
    586  1.3  oster 
    587  1.3  oster 	while (!done) {
    588  1.3  oster 		while (workNeeded) {
    589  1.3  oster 			/* First, flush all logs in the flush queue, freeing
    590  1.3  oster 			 * buffers Second, reintegrate all regions which are
    591  1.3  oster 			 * reported as full. Third, append queued log data
    592  1.3  oster 			 * until blocked.
    593  1.3  oster 			 *
    594  1.3  oster 			 * Note: Incoming appends (ParityLogAppend) can block on
    595  1.3  oster 			 * either 1. empty buffer pool 2. region under
    596  1.3  oster 			 * reintegration To preserve a global FIFO ordering of
    597  1.3  oster 			 * appends, buffers are not released to the world
    598  1.3  oster 			 * until those appends blocked on buffers are removed
    599  1.3  oster 			 * from the append queue.  Similarly, regions which
    600  1.3  oster 			 * are reintegrated are not opened for general use
    601  1.3  oster 			 * until the append queue has been emptied. */
    602  1.3  oster 
    603  1.3  oster 			RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    604  1.3  oster 
    605  1.3  oster 			/* empty flushQueue, using free'd log buffers to
    606  1.3  oster 			 * process bufTail */
    607  1.3  oster 			if (flushQueue)
    608  1.8  oster 			       FlushLogsToDisk(raidPtr, flushQueue);
    609  1.3  oster 
    610  1.3  oster 			/* empty reintQueue, flushing from reintTail as we go */
    611  1.3  oster 			if (reintQueue)
    612  1.3  oster 				ReintegrateLogs(raidPtr, reintQueue);
    613  1.3  oster 
    614  1.3  oster 			RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    615  1.3  oster 			flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
    616  1.3  oster 			raidPtr->parityLogDiskQueue.flushQueue = NULL;
    617  1.3  oster 			reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
    618  1.3  oster 			raidPtr->parityLogDiskQueue.reintQueue = NULL;
    619  1.3  oster 			workNeeded = (flushQueue || reintQueue);
    620  1.3  oster 		}
    621  1.3  oster 		/* no work is needed at this point */
    622  1.3  oster 		if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) {
    623  1.3  oster 			/* shutdown parity logging 1. disable parity logging
    624  1.3  oster 			 * in all regions 2. reintegrate all regions */
    625  1.3  oster 			done = RF_TRUE;	/* thread disabled, no work needed */
    626  1.3  oster 			RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    627  1.3  oster 			rf_ShutdownLogging(raidPtr);
    628  1.3  oster 		}
    629  1.3  oster 		if (!done) {
    630  1.3  oster 			/* thread enabled, no work needed, so sleep */
    631  1.3  oster 			if (rf_parityLogDebug)
    632  1.3  oster 				printf("[parity logging disk manager sleeping]\n");
    633  1.3  oster 			RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex);
    634  1.3  oster 			if (rf_parityLogDebug)
    635  1.3  oster 				printf("[parity logging disk manager just woke up]\n");
    636  1.3  oster 			flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
    637  1.3  oster 			raidPtr->parityLogDiskQueue.flushQueue = NULL;
    638  1.3  oster 			reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
    639  1.3  oster 			raidPtr->parityLogDiskQueue.reintQueue = NULL;
    640  1.3  oster 			workNeeded = (flushQueue || reintQueue);
    641  1.3  oster 		}
    642  1.3  oster 	}
    643  1.3  oster 	/*
    644  1.3  oster          * Announce that we're done.
    645  1.3  oster          */
    646  1.3  oster 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    647  1.3  oster 	raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN;
    648  1.3  oster 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    649  1.3  oster 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
    650  1.8  oster 
    651  1.8  oster 	splx(s);
    652  1.7  oster 
    653  1.3  oster 	/*
    654  1.3  oster          * In the NetBSD kernel, the thread must exit; returning would
    655  1.3  oster          * cause the proc trampoline to attempt to return to userspace.
    656  1.3  oster          */
    657  1.3  oster 	kthread_exit(0);	/* does not return */
    658  1.1  oster }
    659  1.3  oster #endif				/* RF_INCLUDE_PARITYLOGGING > 0 */
    660