Home | History | Annotate | Line # | Download | only in raidframe
rf_paritylogDiskMgr.c revision 1.7
      1  1.7  oster /*	$NetBSD: rf_paritylogDiskMgr.c,v 1.7 2000/01/08 01:18:36 oster Exp $	*/
      2  1.1  oster /*
      3  1.1  oster  * Copyright (c) 1995 Carnegie-Mellon University.
      4  1.1  oster  * All rights reserved.
      5  1.1  oster  *
      6  1.1  oster  * Author: William V. Courtright II
      7  1.1  oster  *
      8  1.1  oster  * Permission to use, copy, modify and distribute this software and
      9  1.1  oster  * its documentation is hereby granted, provided that both the copyright
     10  1.1  oster  * notice and this permission notice appear in all copies of the
     11  1.1  oster  * software, derivative works or modified versions, and any portions
     12  1.1  oster  * thereof, and that both notices appear in supporting documentation.
     13  1.1  oster  *
     14  1.1  oster  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  1.1  oster  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  1.1  oster  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  1.1  oster  *
     18  1.1  oster  * Carnegie Mellon requests users of this software to return to
     19  1.1  oster  *
     20  1.1  oster  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  1.1  oster  *  School of Computer Science
     22  1.1  oster  *  Carnegie Mellon University
     23  1.1  oster  *  Pittsburgh PA 15213-3890
     24  1.1  oster  *
     25  1.1  oster  * any improvements or extensions that they make and grant Carnegie the
     26  1.1  oster  * rights to redistribute these changes.
     27  1.1  oster  */
     28  1.1  oster /* Code for flushing and reintegration operations related to parity logging.
     29  1.1  oster  *
     30  1.1  oster  */
     31  1.1  oster 
     32  1.1  oster #include "rf_archs.h"
     33  1.1  oster 
     34  1.1  oster #if RF_INCLUDE_PARITYLOGGING > 0
     35  1.1  oster 
     36  1.1  oster #include "rf_types.h"
     37  1.1  oster #include "rf_threadstuff.h"
     38  1.1  oster #include "rf_mcpair.h"
     39  1.1  oster #include "rf_raid.h"
     40  1.1  oster #include "rf_dag.h"
     41  1.1  oster #include "rf_dagfuncs.h"
     42  1.1  oster #include "rf_desc.h"
     43  1.1  oster #include "rf_layout.h"
     44  1.1  oster #include "rf_diskqueue.h"
     45  1.1  oster #include "rf_paritylog.h"
     46  1.1  oster #include "rf_general.h"
     47  1.1  oster #include "rf_etimer.h"
     48  1.1  oster #include "rf_paritylogging.h"
     49  1.1  oster #include "rf_engine.h"
     50  1.1  oster #include "rf_dagutils.h"
     51  1.1  oster #include "rf_map.h"
     52  1.1  oster #include "rf_parityscan.h"
     53  1.1  oster 
     54  1.1  oster #include "rf_paritylogDiskMgr.h"
     55  1.1  oster 
     56  1.1  oster static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *);
     57  1.1  oster 
     58  1.3  oster static caddr_t
     59  1.3  oster AcquireReintBuffer(pool)
     60  1.3  oster 	RF_RegionBufferQueue_t *pool;
     61  1.3  oster {
     62  1.3  oster 	caddr_t bufPtr = NULL;
     63  1.3  oster 
     64  1.3  oster 	/* Return a region buffer from the free list (pool). If the free list
     65  1.3  oster 	 * is empty, WAIT. BLOCKING */
     66  1.3  oster 
     67  1.3  oster 	RF_LOCK_MUTEX(pool->mutex);
     68  1.3  oster 	if (pool->availableBuffers > 0) {
     69  1.3  oster 		bufPtr = pool->buffers[pool->availBuffersIndex];
     70  1.3  oster 		pool->availableBuffers--;
     71  1.3  oster 		pool->availBuffersIndex++;
     72  1.3  oster 		if (pool->availBuffersIndex == pool->totalBuffers)
     73  1.3  oster 			pool->availBuffersIndex = 0;
     74  1.3  oster 		RF_UNLOCK_MUTEX(pool->mutex);
     75  1.3  oster 	} else {
     76  1.3  oster 		RF_PANIC();	/* should never happen in currect config,
     77  1.3  oster 				 * single reint */
     78  1.3  oster 		RF_WAIT_COND(pool->cond, pool->mutex);
     79  1.3  oster 	}
     80  1.3  oster 	return (bufPtr);
     81  1.3  oster }
     82  1.3  oster 
     83  1.3  oster static void
     84  1.3  oster ReleaseReintBuffer(
     85  1.3  oster     RF_RegionBufferQueue_t * pool,
     86  1.3  oster     caddr_t bufPtr)
     87  1.3  oster {
     88  1.3  oster 	/* Insert a region buffer (bufPtr) into the free list (pool).
     89  1.3  oster 	 * NON-BLOCKING */
     90  1.3  oster 
     91  1.3  oster 	RF_LOCK_MUTEX(pool->mutex);
     92  1.3  oster 	pool->availableBuffers++;
     93  1.3  oster 	pool->buffers[pool->emptyBuffersIndex] = bufPtr;
     94  1.3  oster 	pool->emptyBuffersIndex++;
     95  1.3  oster 	if (pool->emptyBuffersIndex == pool->totalBuffers)
     96  1.3  oster 		pool->emptyBuffersIndex = 0;
     97  1.3  oster 	RF_ASSERT(pool->availableBuffers <= pool->totalBuffers);
     98  1.3  oster 	RF_UNLOCK_MUTEX(pool->mutex);
     99  1.3  oster 	RF_SIGNAL_COND(pool->cond);
    100  1.3  oster }
    101  1.3  oster 
    102  1.3  oster 
    103  1.1  oster 
    104  1.3  oster static void
    105  1.3  oster ReadRegionLog(
    106  1.3  oster     RF_RegionId_t regionID,
    107  1.3  oster     RF_MCPair_t * rrd_mcpair,
    108  1.3  oster     caddr_t regionBuffer,
    109  1.3  oster     RF_Raid_t * raidPtr,
    110  1.3  oster     RF_DagHeader_t ** rrd_dag_h,
    111  1.3  oster     RF_AllocListElem_t ** rrd_alloclist,
    112  1.3  oster     RF_PhysDiskAddr_t ** rrd_pda)
    113  1.3  oster {
    114  1.3  oster 	/* Initiate the read a region log from disk.  Once initiated, return
    115  1.3  oster 	 * to the calling routine.
    116  1.3  oster 	 *
    117  1.3  oster 	 * NON-BLOCKING */
    118  1.3  oster 
    119  1.3  oster 	RF_AccTraceEntry_t tracerec;
    120  1.3  oster 	RF_DagNode_t *rrd_rdNode;
    121  1.3  oster 
    122  1.3  oster 	/* create DAG to read region log from disk */
    123  1.3  oster 	rf_MakeAllocList(*rrd_alloclist);
    124  1.3  oster 	*rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, rf_DiskReadFunc, rf_DiskReadUndoFunc,
    125  1.3  oster 	    "Rrl", *rrd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
    126  1.3  oster 
    127  1.3  oster 	/* create and initialize PDA for the core log */
    128  1.3  oster 	/* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
    129  1.3  oster 	 * *)); */
    130  1.3  oster 	*rrd_pda = rf_AllocPDAList(1);
    131  1.3  oster 	rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), &((*rrd_pda)->col), &((*rrd_pda)->startSector));
    132  1.3  oster 	(*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity;
    133  1.3  oster 
    134  1.3  oster 	if ((*rrd_pda)->next) {
    135  1.3  oster 		(*rrd_pda)->next = NULL;
    136  1.3  oster 		printf("set rrd_pda->next to NULL\n");
    137  1.3  oster 	}
    138  1.3  oster 	/* initialize DAG parameters */
    139  1.3  oster 	bzero((char *) &tracerec, sizeof(tracerec));
    140  1.3  oster 	(*rrd_dag_h)->tracerec = &tracerec;
    141  1.3  oster 	rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0];
    142  1.3  oster 	rrd_rdNode->params[0].p = *rrd_pda;
    143  1.1  oster /*  rrd_rdNode->params[1] = regionBuffer; */
    144  1.3  oster 	rrd_rdNode->params[2].v = 0;
    145  1.3  oster 	rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
    146  1.1  oster 
    147  1.3  oster 	/* launch region log read dag */
    148  1.3  oster 	rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
    149  1.3  oster 	    (void *) rrd_mcpair);
    150  1.1  oster }
    151  1.1  oster 
    152  1.1  oster 
    153  1.1  oster 
    154  1.3  oster static void
    155  1.3  oster WriteCoreLog(
    156  1.3  oster     RF_ParityLog_t * log,
    157  1.3  oster     RF_MCPair_t * fwr_mcpair,
    158  1.3  oster     RF_Raid_t * raidPtr,
    159  1.3  oster     RF_DagHeader_t ** fwr_dag_h,
    160  1.3  oster     RF_AllocListElem_t ** fwr_alloclist,
    161  1.3  oster     RF_PhysDiskAddr_t ** fwr_pda)
    162  1.3  oster {
    163  1.3  oster 	RF_RegionId_t regionID = log->regionID;
    164  1.3  oster 	RF_AccTraceEntry_t tracerec;
    165  1.3  oster 	RF_SectorNum_t regionOffset;
    166  1.3  oster 	RF_DagNode_t *fwr_wrNode;
    167  1.3  oster 
    168  1.3  oster 	/* Initiate the write of a core log to a region log disk. Once
    169  1.3  oster 	 * initiated, return to the calling routine.
    170  1.3  oster 	 *
    171  1.3  oster 	 * NON-BLOCKING */
    172  1.3  oster 
    173  1.3  oster 	/* create DAG to write a core log to a region log disk */
    174  1.3  oster 	rf_MakeAllocList(*fwr_alloclist);
    175  1.3  oster 	*fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    176  1.3  oster 	    "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
    177  1.3  oster 
    178  1.3  oster 	/* create and initialize PDA for the region log */
    179  1.3  oster 	/* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
    180  1.3  oster 	 * *)); */
    181  1.3  oster 	*fwr_pda = rf_AllocPDAList(1);
    182  1.3  oster 	regionOffset = log->diskOffset;
    183  1.3  oster 	rf_MapLogParityLogging(raidPtr, regionID, regionOffset, &((*fwr_pda)->row), &((*fwr_pda)->col), &((*fwr_pda)->startSector));
    184  1.3  oster 	(*fwr_pda)->numSector = raidPtr->numSectorsPerLog;
    185  1.3  oster 
    186  1.3  oster 	/* initialize DAG parameters */
    187  1.3  oster 	bzero((char *) &tracerec, sizeof(tracerec));
    188  1.3  oster 	(*fwr_dag_h)->tracerec = &tracerec;
    189  1.3  oster 	fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0];
    190  1.3  oster 	fwr_wrNode->params[0].p = *fwr_pda;
    191  1.1  oster /*  fwr_wrNode->params[1] = log->bufPtr; */
    192  1.3  oster 	fwr_wrNode->params[2].v = 0;
    193  1.3  oster 	fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
    194  1.3  oster 
    195  1.3  oster 	/* launch the dag to write the core log to disk */
    196  1.3  oster 	rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
    197  1.3  oster 	    (void *) fwr_mcpair);
    198  1.3  oster }
    199  1.3  oster 
    200  1.3  oster 
    201  1.3  oster static void
    202  1.3  oster ReadRegionParity(
    203  1.3  oster     RF_RegionId_t regionID,
    204  1.3  oster     RF_MCPair_t * prd_mcpair,
    205  1.3  oster     caddr_t parityBuffer,
    206  1.3  oster     RF_Raid_t * raidPtr,
    207  1.3  oster     RF_DagHeader_t ** prd_dag_h,
    208  1.3  oster     RF_AllocListElem_t ** prd_alloclist,
    209  1.3  oster     RF_PhysDiskAddr_t ** prd_pda)
    210  1.3  oster {
    211  1.3  oster 	/* Initiate the read region parity from disk. Once initiated, return
    212  1.3  oster 	 * to the calling routine.
    213  1.3  oster 	 *
    214  1.3  oster 	 * NON-BLOCKING */
    215  1.3  oster 
    216  1.3  oster 	RF_AccTraceEntry_t tracerec;
    217  1.3  oster 	RF_DagNode_t *prd_rdNode;
    218  1.3  oster 
    219  1.3  oster 	/* create DAG to read region parity from disk */
    220  1.3  oster 	rf_MakeAllocList(*prd_alloclist);
    221  1.3  oster 	*prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, rf_DiskReadUndoFunc,
    222  1.3  oster 	    "Rrp", *prd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
    223  1.3  oster 
    224  1.3  oster 	/* create and initialize PDA for region parity */
    225  1.3  oster 	/* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
    226  1.3  oster 	 * *)); */
    227  1.3  oster 	*prd_pda = rf_AllocPDAList(1);
    228  1.3  oster 	rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), &((*prd_pda)->col), &((*prd_pda)->startSector), &((*prd_pda)->numSector));
    229  1.3  oster 	if (rf_parityLogDebug)
    230  1.3  oster 		printf("[reading %d sectors of parity from region %d]\n",
    231  1.3  oster 		    (int) (*prd_pda)->numSector, regionID);
    232  1.3  oster 	if ((*prd_pda)->next) {
    233  1.3  oster 		(*prd_pda)->next = NULL;
    234  1.3  oster 		printf("set prd_pda->next to NULL\n");
    235  1.3  oster 	}
    236  1.3  oster 	/* initialize DAG parameters */
    237  1.3  oster 	bzero((char *) &tracerec, sizeof(tracerec));
    238  1.3  oster 	(*prd_dag_h)->tracerec = &tracerec;
    239  1.3  oster 	prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0];
    240  1.3  oster 	prd_rdNode->params[0].p = *prd_pda;
    241  1.3  oster 	prd_rdNode->params[1].p = parityBuffer;
    242  1.3  oster 	prd_rdNode->params[2].v = 0;
    243  1.3  oster 	prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
    244  1.3  oster 	if (rf_validateDAGDebug)
    245  1.3  oster 		rf_ValidateDAG(*prd_dag_h);
    246  1.3  oster 	/* launch region parity read dag */
    247  1.3  oster 	rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
    248  1.3  oster 	    (void *) prd_mcpair);
    249  1.3  oster }
    250  1.3  oster 
    251  1.3  oster static void
    252  1.3  oster WriteRegionParity(
    253  1.3  oster     RF_RegionId_t regionID,
    254  1.3  oster     RF_MCPair_t * pwr_mcpair,
    255  1.3  oster     caddr_t parityBuffer,
    256  1.3  oster     RF_Raid_t * raidPtr,
    257  1.3  oster     RF_DagHeader_t ** pwr_dag_h,
    258  1.3  oster     RF_AllocListElem_t ** pwr_alloclist,
    259  1.3  oster     RF_PhysDiskAddr_t ** pwr_pda)
    260  1.3  oster {
    261  1.3  oster 	/* Initiate the write of region parity to disk. Once initiated, return
    262  1.3  oster 	 * to the calling routine.
    263  1.3  oster 	 *
    264  1.3  oster 	 * NON-BLOCKING */
    265  1.3  oster 
    266  1.3  oster 	RF_AccTraceEntry_t tracerec;
    267  1.3  oster 	RF_DagNode_t *pwr_wrNode;
    268  1.3  oster 
    269  1.3  oster 	/* create DAG to write region log from disk */
    270  1.3  oster 	rf_MakeAllocList(*pwr_alloclist);
    271  1.3  oster 	*pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    272  1.3  oster 	    "Wrp", *pwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
    273  1.3  oster 
    274  1.3  oster 	/* create and initialize PDA for region parity */
    275  1.3  oster 	/* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
    276  1.3  oster 	 * *)); */
    277  1.3  oster 	*pwr_pda = rf_AllocPDAList(1);
    278  1.3  oster 	rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), &((*pwr_pda)->col), &((*pwr_pda)->startSector), &((*pwr_pda)->numSector));
    279  1.3  oster 
    280  1.3  oster 	/* initialize DAG parameters */
    281  1.3  oster 	bzero((char *) &tracerec, sizeof(tracerec));
    282  1.3  oster 	(*pwr_dag_h)->tracerec = &tracerec;
    283  1.3  oster 	pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0];
    284  1.3  oster 	pwr_wrNode->params[0].p = *pwr_pda;
    285  1.1  oster /*  pwr_wrNode->params[1] = parityBuffer; */
    286  1.3  oster 	pwr_wrNode->params[2].v = 0;
    287  1.3  oster 	pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
    288  1.1  oster 
    289  1.3  oster 	/* launch the dag to write region parity to disk */
    290  1.3  oster 	rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
    291  1.3  oster 	    (void *) pwr_mcpair);
    292  1.3  oster }
    293  1.3  oster 
    294  1.3  oster static void
    295  1.3  oster FlushLogsToDisk(
    296  1.3  oster     RF_Raid_t * raidPtr,
    297  1.3  oster     RF_ParityLog_t * logList)
    298  1.3  oster {
    299  1.3  oster 	/* Flush a linked list of core logs to the log disk. Logs contain the
    300  1.3  oster 	 * disk location where they should be written.  Logs were written in
    301  1.3  oster 	 * FIFO order and that order must be preserved.
    302  1.3  oster 	 *
    303  1.3  oster 	 * Recommended optimizations: 1) allow multiple flushes to occur
    304  1.3  oster 	 * simultaneously 2) coalesce contiguous flush operations
    305  1.3  oster 	 *
    306  1.3  oster 	 * BLOCKING */
    307  1.3  oster 
    308  1.3  oster 	RF_ParityLog_t *log;
    309  1.3  oster 	RF_RegionId_t regionID;
    310  1.3  oster 	RF_MCPair_t *fwr_mcpair;
    311  1.3  oster 	RF_DagHeader_t *fwr_dag_h;
    312  1.3  oster 	RF_AllocListElem_t *fwr_alloclist;
    313  1.3  oster 	RF_PhysDiskAddr_t *fwr_pda;
    314  1.3  oster 
    315  1.3  oster 	fwr_mcpair = rf_AllocMCPair();
    316  1.3  oster 	RF_LOCK_MUTEX(fwr_mcpair->mutex);
    317  1.3  oster 
    318  1.3  oster 	RF_ASSERT(logList);
    319  1.3  oster 	log = logList;
    320  1.3  oster 	while (log) {
    321  1.3  oster 		regionID = log->regionID;
    322  1.3  oster 
    323  1.3  oster 		/* create and launch a DAG to write the core log */
    324  1.3  oster 		if (rf_parityLogDebug)
    325  1.3  oster 			printf("[initiating write of core log for region %d]\n", regionID);
    326  1.3  oster 		fwr_mcpair->flag = RF_FALSE;
    327  1.3  oster 		WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, &fwr_alloclist, &fwr_pda);
    328  1.3  oster 
    329  1.3  oster 		/* wait for the DAG to complete */
    330  1.3  oster 		while (!fwr_mcpair->flag)
    331  1.3  oster 			RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex);
    332  1.3  oster 		if (fwr_dag_h->status != rf_enable) {
    333  1.3  oster 			RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID);
    334  1.3  oster 			RF_ASSERT(0);
    335  1.3  oster 		}
    336  1.3  oster 		/* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */
    337  1.3  oster 		rf_FreePhysDiskAddr(fwr_pda);
    338  1.3  oster 		rf_FreeDAG(fwr_dag_h);
    339  1.3  oster 		rf_FreeAllocList(fwr_alloclist);
    340  1.3  oster 
    341  1.3  oster 		log = log->next;
    342  1.3  oster 	}
    343  1.3  oster 	RF_UNLOCK_MUTEX(fwr_mcpair->mutex);
    344  1.3  oster 	rf_FreeMCPair(fwr_mcpair);
    345  1.3  oster 	rf_ReleaseParityLogs(raidPtr, logList);
    346  1.3  oster }
    347  1.3  oster 
    348  1.3  oster static void
    349  1.3  oster ReintegrateRegion(
    350  1.3  oster     RF_Raid_t * raidPtr,
    351  1.3  oster     RF_RegionId_t regionID,
    352  1.3  oster     RF_ParityLog_t * coreLog)
    353  1.3  oster {
    354  1.3  oster 	RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair;
    355  1.3  oster 	RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h;
    356  1.3  oster 	RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist;
    357  1.3  oster 	RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda;
    358  1.3  oster 	caddr_t parityBuffer, regionBuffer = NULL;
    359  1.3  oster 
    360  1.3  oster 	/* Reintegrate a region (regionID). 1. acquire region and parity
    361  1.3  oster 	 * buffers 2. read log from disk 3. read parity from disk 4. apply log
    362  1.3  oster 	 * to parity 5. apply core log to parity 6. write new parity to disk
    363  1.3  oster 	 *
    364  1.3  oster 	 * BLOCKING */
    365  1.3  oster 
    366  1.3  oster 	if (rf_parityLogDebug)
    367  1.3  oster 		printf("[reintegrating region %d]\n", regionID);
    368  1.3  oster 
    369  1.3  oster 	/* initiate read of region parity */
    370  1.3  oster 	if (rf_parityLogDebug)
    371  1.3  oster 		printf("[initiating read of parity for region %d]\n", regionID);
    372  1.3  oster 	parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool);
    373  1.3  oster 	prd_mcpair = rf_AllocMCPair();
    374  1.3  oster 	RF_LOCK_MUTEX(prd_mcpair->mutex);
    375  1.3  oster 	prd_mcpair->flag = RF_FALSE;
    376  1.3  oster 	ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, &prd_dag_h, &prd_alloclist, &prd_pda);
    377  1.3  oster 
    378  1.3  oster 	/* if region log nonempty, initiate read */
    379  1.3  oster 	if (raidPtr->regionInfo[regionID].diskCount > 0) {
    380  1.3  oster 		if (rf_parityLogDebug)
    381  1.3  oster 			printf("[initiating read of disk log for region %d]\n", regionID);
    382  1.3  oster 		regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool);
    383  1.3  oster 		rrd_mcpair = rf_AllocMCPair();
    384  1.3  oster 		RF_LOCK_MUTEX(rrd_mcpair->mutex);
    385  1.3  oster 		rrd_mcpair->flag = RF_FALSE;
    386  1.3  oster 		ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, &rrd_dag_h, &rrd_alloclist, &rrd_pda);
    387  1.3  oster 	}
    388  1.3  oster 	/* wait on read of region parity to complete */
    389  1.3  oster 	while (!prd_mcpair->flag) {
    390  1.3  oster 		RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex);
    391  1.3  oster 	}
    392  1.3  oster 	RF_UNLOCK_MUTEX(prd_mcpair->mutex);
    393  1.3  oster 	if (prd_dag_h->status != rf_enable) {
    394  1.3  oster 		RF_ERRORMSG("Unable to read parity from disk\n");
    395  1.3  oster 		/* add code to fail the parity disk */
    396  1.3  oster 		RF_ASSERT(0);
    397  1.3  oster 	}
    398  1.3  oster 	/* apply core log to parity */
    399  1.3  oster 	/* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */
    400  1.3  oster 
    401  1.3  oster 	if (raidPtr->regionInfo[regionID].diskCount > 0) {
    402  1.3  oster 		/* wait on read of region log to complete */
    403  1.3  oster 		while (!rrd_mcpair->flag)
    404  1.3  oster 			RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex);
    405  1.3  oster 		RF_UNLOCK_MUTEX(rrd_mcpair->mutex);
    406  1.3  oster 		if (rrd_dag_h->status != rf_enable) {
    407  1.3  oster 			RF_ERRORMSG("Unable to read region log from disk\n");
    408  1.3  oster 			/* add code to fail the log disk */
    409  1.3  oster 			RF_ASSERT(0);
    410  1.3  oster 		}
    411  1.3  oster 		/* apply region log to parity */
    412  1.3  oster 		/* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */
    413  1.3  oster 		/* release resources associated with region log */
    414  1.3  oster 		/* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */
    415  1.3  oster 		rf_FreePhysDiskAddr(rrd_pda);
    416  1.3  oster 		rf_FreeDAG(rrd_dag_h);
    417  1.3  oster 		rf_FreeAllocList(rrd_alloclist);
    418  1.3  oster 		rf_FreeMCPair(rrd_mcpair);
    419  1.3  oster 		ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer);
    420  1.3  oster 	}
    421  1.3  oster 	/* write reintegrated parity to disk */
    422  1.3  oster 	if (rf_parityLogDebug)
    423  1.3  oster 		printf("[initiating write of parity for region %d]\n", regionID);
    424  1.3  oster 	pwr_mcpair = rf_AllocMCPair();
    425  1.3  oster 	RF_LOCK_MUTEX(pwr_mcpair->mutex);
    426  1.3  oster 	pwr_mcpair->flag = RF_FALSE;
    427  1.3  oster 	WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, &pwr_dag_h, &pwr_alloclist, &pwr_pda);
    428  1.3  oster 	while (!pwr_mcpair->flag)
    429  1.3  oster 		RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex);
    430  1.3  oster 	RF_UNLOCK_MUTEX(pwr_mcpair->mutex);
    431  1.3  oster 	if (pwr_dag_h->status != rf_enable) {
    432  1.3  oster 		RF_ERRORMSG("Unable to write parity to disk\n");
    433  1.3  oster 		/* add code to fail the parity disk */
    434  1.3  oster 		RF_ASSERT(0);
    435  1.3  oster 	}
    436  1.3  oster 	/* release resources associated with read of old parity */
    437  1.3  oster 	/* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */
    438  1.3  oster 	rf_FreePhysDiskAddr(prd_pda);
    439  1.3  oster 	rf_FreeDAG(prd_dag_h);
    440  1.3  oster 	rf_FreeAllocList(prd_alloclist);
    441  1.3  oster 	rf_FreeMCPair(prd_mcpair);
    442  1.3  oster 
    443  1.3  oster 	/* release resources associated with write of new parity */
    444  1.3  oster 	ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer);
    445  1.3  oster 	/* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */
    446  1.3  oster 	rf_FreePhysDiskAddr(pwr_pda);
    447  1.3  oster 	rf_FreeDAG(pwr_dag_h);
    448  1.3  oster 	rf_FreeAllocList(pwr_alloclist);
    449  1.3  oster 	rf_FreeMCPair(pwr_mcpair);
    450  1.3  oster 
    451  1.3  oster 	if (rf_parityLogDebug)
    452  1.3  oster 		printf("[finished reintegrating region %d]\n", regionID);
    453  1.3  oster }
    454  1.3  oster 
    455  1.3  oster 
    456  1.3  oster 
    457  1.3  oster static void
    458  1.3  oster ReintegrateLogs(
    459  1.3  oster     RF_Raid_t * raidPtr,
    460  1.3  oster     RF_ParityLog_t * logList)
    461  1.3  oster {
    462  1.3  oster 	RF_ParityLog_t *log, *freeLogList = NULL;
    463  1.3  oster 	RF_ParityLogData_t *logData, *logDataList;
    464  1.3  oster 	RF_RegionId_t regionID;
    465  1.3  oster 
    466  1.3  oster 	RF_ASSERT(logList);
    467  1.3  oster 	while (logList) {
    468  1.3  oster 		log = logList;
    469  1.3  oster 		logList = logList->next;
    470  1.3  oster 		log->next = NULL;
    471  1.3  oster 		regionID = log->regionID;
    472  1.3  oster 		ReintegrateRegion(raidPtr, regionID, log);
    473  1.3  oster 		log->numRecords = 0;
    474  1.3  oster 
    475  1.3  oster 		/* remove all items which are blocked on reintegration of this
    476  1.3  oster 		 * region */
    477  1.3  oster 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    478  1.3  oster 		logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
    479  1.3  oster 		logDataList = logData;
    480  1.3  oster 		while (logData) {
    481  1.3  oster 			logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
    482  1.3  oster 			logData = logData->next;
    483  1.3  oster 		}
    484  1.3  oster 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    485  1.3  oster 
    486  1.3  oster 		/* process blocked log data and clear reintInProgress flag for
    487  1.3  oster 		 * this region */
    488  1.3  oster 		if (logDataList)
    489  1.3  oster 			rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE);
    490  1.3  oster 		else {
    491  1.3  oster 			/* Enable flushing for this region.  Holding both
    492  1.3  oster 			 * locks provides a synchronization barrier with
    493  1.3  oster 			 * DumpParityLogToDisk */
    494  1.3  oster 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    495  1.3  oster 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    496  1.3  oster 			RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    497  1.3  oster 			raidPtr->regionInfo[regionID].diskCount = 0;
    498  1.3  oster 			raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
    499  1.3  oster 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    500  1.3  oster 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);	/* flushing is now
    501  1.3  oster 											 * enabled */
    502  1.3  oster 			RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    503  1.3  oster 		}
    504  1.3  oster 		/* if log wasn't used, attach it to the list of logs to be
    505  1.3  oster 		 * returned */
    506  1.3  oster 		if (log) {
    507  1.3  oster 			log->next = freeLogList;
    508  1.3  oster 			freeLogList = log;
    509  1.3  oster 		}
    510  1.3  oster 	}
    511  1.3  oster 	if (freeLogList)
    512  1.3  oster 		rf_ReleaseParityLogs(raidPtr, freeLogList);
    513  1.3  oster }
    514  1.3  oster 
    515  1.3  oster int
    516  1.3  oster rf_ShutdownLogging(RF_Raid_t * raidPtr)
    517  1.3  oster {
    518  1.3  oster 	/* shutdown parity logging 1) disable parity logging in all regions 2)
    519  1.3  oster 	 * reintegrate all regions */
    520  1.3  oster 
    521  1.3  oster 	RF_SectorCount_t diskCount;
    522  1.3  oster 	RF_RegionId_t regionID;
    523  1.3  oster 	RF_ParityLog_t *log;
    524  1.3  oster 
    525  1.3  oster 	if (rf_parityLogDebug)
    526  1.3  oster 		printf("[shutting down parity logging]\n");
    527  1.3  oster 	/* Since parity log maps are volatile, we must reintegrate all
    528  1.3  oster 	 * regions. */
    529  1.3  oster 	if (rf_forceParityLogReint) {
    530  1.3  oster 		for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
    531  1.3  oster 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    532  1.3  oster 			raidPtr->regionInfo[regionID].loggingEnabled = RF_FALSE;
    533  1.3  oster 			log = raidPtr->regionInfo[regionID].coreLog;
    534  1.3  oster 			raidPtr->regionInfo[regionID].coreLog = NULL;
    535  1.3  oster 			diskCount = raidPtr->regionInfo[regionID].diskCount;
    536  1.3  oster 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    537  1.3  oster 			if (diskCount > 0 || log != NULL)
    538  1.3  oster 				ReintegrateRegion(raidPtr, regionID, log);
    539  1.3  oster 			if (log != NULL)
    540  1.3  oster 				rf_ReleaseParityLogs(raidPtr, log);
    541  1.3  oster 		}
    542  1.3  oster 	}
    543  1.3  oster 	if (rf_parityLogDebug) {
    544  1.3  oster 		printf("[parity logging disabled]\n");
    545  1.3  oster 		printf("[should be done!]\n");
    546  1.3  oster 	}
    547  1.3  oster 	return (0);
    548  1.3  oster }
    549  1.3  oster 
    550  1.3  oster int
    551  1.3  oster rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr)
    552  1.3  oster {
    553  1.3  oster 	RF_ParityLog_t *reintQueue, *flushQueue;
    554  1.3  oster 	int     workNeeded, done = RF_FALSE;
    555  1.3  oster 
    556  1.3  oster 	/* Main program for parity logging disk thread.  This routine waits
    557  1.3  oster 	 * for work to appear in either the flush or reintegration queues and
    558  1.3  oster 	 * is responsible for flushing core logs to the log disk as well as
    559  1.3  oster 	 * reintegrating parity regions.
    560  1.3  oster 	 *
    561  1.3  oster 	 * BLOCKING */
    562  1.3  oster 
    563  1.3  oster 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    564  1.3  oster 
    565  1.3  oster 	/*
    566  1.3  oster          * Inform our creator that we're running. Don't bother doing the
    567  1.3  oster          * mutex lock/unlock dance- we locked above, and we'll unlock
    568  1.3  oster          * below with nothing to do, yet.
    569  1.3  oster          */
    570  1.3  oster 	raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING;
    571  1.3  oster 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
    572  1.3  oster 
    573  1.3  oster 	/* empty the work queues */
    574  1.3  oster 	flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
    575  1.3  oster 	raidPtr->parityLogDiskQueue.flushQueue = NULL;
    576  1.3  oster 	reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
    577  1.3  oster 	raidPtr->parityLogDiskQueue.reintQueue = NULL;
    578  1.3  oster 	workNeeded = (flushQueue || reintQueue);
    579  1.3  oster 
    580  1.3  oster 	while (!done) {
    581  1.3  oster 		while (workNeeded) {
    582  1.3  oster 			/* First, flush all logs in the flush queue, freeing
    583  1.3  oster 			 * buffers Second, reintegrate all regions which are
    584  1.3  oster 			 * reported as full. Third, append queued log data
    585  1.3  oster 			 * until blocked.
    586  1.3  oster 			 *
    587  1.3  oster 			 * Note: Incoming appends (ParityLogAppend) can block on
    588  1.3  oster 			 * either 1. empty buffer pool 2. region under
    589  1.3  oster 			 * reintegration To preserve a global FIFO ordering of
    590  1.3  oster 			 * appends, buffers are not released to the world
    591  1.3  oster 			 * until those appends blocked on buffers are removed
    592  1.3  oster 			 * from the append queue.  Similarly, regions which
    593  1.3  oster 			 * are reintegrated are not opened for general use
    594  1.3  oster 			 * until the append queue has been emptied. */
    595  1.3  oster 
    596  1.3  oster 			RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    597  1.3  oster 
    598  1.3  oster 			/* empty flushQueue, using free'd log buffers to
    599  1.3  oster 			 * process bufTail */
    600  1.3  oster 			if (flushQueue)
    601  1.3  oster 				FlushLogsToDisk(raidPtr, flushQueue);
    602  1.3  oster 
    603  1.3  oster 			/* empty reintQueue, flushing from reintTail as we go */
    604  1.3  oster 			if (reintQueue)
    605  1.3  oster 				ReintegrateLogs(raidPtr, reintQueue);
    606  1.3  oster 
    607  1.3  oster 			RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    608  1.3  oster 			flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
    609  1.3  oster 			raidPtr->parityLogDiskQueue.flushQueue = NULL;
    610  1.3  oster 			reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
    611  1.3  oster 			raidPtr->parityLogDiskQueue.reintQueue = NULL;
    612  1.3  oster 			workNeeded = (flushQueue || reintQueue);
    613  1.3  oster 		}
    614  1.3  oster 		/* no work is needed at this point */
    615  1.3  oster 		if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) {
    616  1.3  oster 			/* shutdown parity logging 1. disable parity logging
    617  1.3  oster 			 * in all regions 2. reintegrate all regions */
    618  1.3  oster 			done = RF_TRUE;	/* thread disabled, no work needed */
    619  1.3  oster 			RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    620  1.3  oster 			rf_ShutdownLogging(raidPtr);
    621  1.3  oster 		}
    622  1.3  oster 		if (!done) {
    623  1.3  oster 			/* thread enabled, no work needed, so sleep */
    624  1.3  oster 			if (rf_parityLogDebug)
    625  1.3  oster 				printf("[parity logging disk manager sleeping]\n");
    626  1.3  oster 			RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex);
    627  1.3  oster 			if (rf_parityLogDebug)
    628  1.3  oster 				printf("[parity logging disk manager just woke up]\n");
    629  1.3  oster 			flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
    630  1.3  oster 			raidPtr->parityLogDiskQueue.flushQueue = NULL;
    631  1.3  oster 			reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
    632  1.3  oster 			raidPtr->parityLogDiskQueue.reintQueue = NULL;
    633  1.3  oster 			workNeeded = (flushQueue || reintQueue);
    634  1.3  oster 		}
    635  1.3  oster 	}
    636  1.3  oster 	/*
    637  1.3  oster          * Announce that we're done.
    638  1.3  oster          */
    639  1.3  oster 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    640  1.3  oster 	raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN;
    641  1.3  oster 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    642  1.3  oster 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
    643  1.7  oster 
    644  1.3  oster 	/*
    645  1.3  oster          * In the NetBSD kernel, the thread must exit; returning would
    646  1.3  oster          * cause the proc trampoline to attempt to return to userspace.
    647  1.3  oster          */
    648  1.3  oster 	kthread_exit(0);	/* does not return */
    649  1.1  oster }
    650  1.3  oster #endif				/* RF_INCLUDE_PARITYLOGGING > 0 */
    651