Home | History | Annotate | Line # | Download | only in raidframe
rf_paritylogDiskMgr.c revision 1.4.2.1
      1  1.4.2.1  bouyer /*	$NetBSD: rf_paritylogDiskMgr.c,v 1.4.2.1 2000/11/20 11:42:56 bouyer Exp $	*/
      2      1.1   oster /*
      3      1.1   oster  * Copyright (c) 1995 Carnegie-Mellon University.
      4      1.1   oster  * All rights reserved.
      5      1.1   oster  *
      6      1.1   oster  * Author: William V. Courtright II
      7      1.1   oster  *
      8      1.1   oster  * Permission to use, copy, modify and distribute this software and
      9      1.1   oster  * its documentation is hereby granted, provided that both the copyright
     10      1.1   oster  * notice and this permission notice appear in all copies of the
     11      1.1   oster  * software, derivative works or modified versions, and any portions
     12      1.1   oster  * thereof, and that both notices appear in supporting documentation.
     13      1.1   oster  *
     14      1.1   oster  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15      1.1   oster  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16      1.1   oster  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17      1.1   oster  *
     18      1.1   oster  * Carnegie Mellon requests users of this software to return to
     19      1.1   oster  *
     20      1.1   oster  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21      1.1   oster  *  School of Computer Science
     22      1.1   oster  *  Carnegie Mellon University
     23      1.1   oster  *  Pittsburgh PA 15213-3890
     24      1.1   oster  *
     25      1.1   oster  * any improvements or extensions that they make and grant Carnegie the
     26      1.1   oster  * rights to redistribute these changes.
     27      1.1   oster  */
     28      1.1   oster /* Code for flushing and reintegration operations related to parity logging.
     29      1.1   oster  *
     30      1.1   oster  */
     31      1.1   oster 
     32      1.1   oster #include "rf_archs.h"
     33      1.1   oster 
     34      1.1   oster #if RF_INCLUDE_PARITYLOGGING > 0
     35      1.1   oster 
     36      1.1   oster #include "rf_types.h"
     37      1.1   oster #include "rf_threadstuff.h"
     38      1.1   oster #include "rf_mcpair.h"
     39      1.1   oster #include "rf_raid.h"
     40      1.1   oster #include "rf_dag.h"
     41      1.1   oster #include "rf_dagfuncs.h"
     42      1.1   oster #include "rf_desc.h"
     43      1.1   oster #include "rf_layout.h"
     44      1.1   oster #include "rf_diskqueue.h"
     45      1.1   oster #include "rf_paritylog.h"
     46      1.1   oster #include "rf_general.h"
     47      1.1   oster #include "rf_etimer.h"
     48      1.1   oster #include "rf_paritylogging.h"
     49      1.1   oster #include "rf_engine.h"
     50      1.1   oster #include "rf_dagutils.h"
     51      1.1   oster #include "rf_map.h"
     52      1.1   oster #include "rf_parityscan.h"
     53      1.1   oster 
     54      1.1   oster #include "rf_paritylogDiskMgr.h"
     55      1.1   oster 
     56      1.1   oster static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *);
     57      1.1   oster 
     58      1.3   oster static caddr_t
     59      1.3   oster AcquireReintBuffer(pool)
     60      1.3   oster 	RF_RegionBufferQueue_t *pool;
     61      1.3   oster {
     62      1.3   oster 	caddr_t bufPtr = NULL;
     63      1.3   oster 
     64      1.3   oster 	/* Return a region buffer from the free list (pool). If the free list
     65      1.3   oster 	 * is empty, WAIT. BLOCKING */
     66      1.3   oster 
     67      1.3   oster 	RF_LOCK_MUTEX(pool->mutex);
     68      1.3   oster 	if (pool->availableBuffers > 0) {
     69      1.3   oster 		bufPtr = pool->buffers[pool->availBuffersIndex];
     70      1.3   oster 		pool->availableBuffers--;
     71      1.3   oster 		pool->availBuffersIndex++;
     72      1.3   oster 		if (pool->availBuffersIndex == pool->totalBuffers)
     73      1.3   oster 			pool->availBuffersIndex = 0;
     74      1.3   oster 		RF_UNLOCK_MUTEX(pool->mutex);
     75      1.3   oster 	} else {
     76  1.4.2.1  bouyer 		RF_PANIC();	/* should never happen in correct config,
     77      1.3   oster 				 * single reint */
     78      1.3   oster 		RF_WAIT_COND(pool->cond, pool->mutex);
     79      1.3   oster 	}
     80      1.3   oster 	return (bufPtr);
     81      1.3   oster }
     82      1.3   oster 
     83      1.3   oster static void
     84      1.3   oster ReleaseReintBuffer(
     85      1.3   oster     RF_RegionBufferQueue_t * pool,
     86      1.3   oster     caddr_t bufPtr)
     87      1.3   oster {
     88      1.3   oster 	/* Insert a region buffer (bufPtr) into the free list (pool).
     89      1.3   oster 	 * NON-BLOCKING */
     90      1.3   oster 
     91      1.3   oster 	RF_LOCK_MUTEX(pool->mutex);
     92      1.3   oster 	pool->availableBuffers++;
     93      1.3   oster 	pool->buffers[pool->emptyBuffersIndex] = bufPtr;
     94      1.3   oster 	pool->emptyBuffersIndex++;
     95      1.3   oster 	if (pool->emptyBuffersIndex == pool->totalBuffers)
     96      1.3   oster 		pool->emptyBuffersIndex = 0;
     97      1.3   oster 	RF_ASSERT(pool->availableBuffers <= pool->totalBuffers);
     98      1.3   oster 	RF_UNLOCK_MUTEX(pool->mutex);
     99      1.3   oster 	RF_SIGNAL_COND(pool->cond);
    100      1.3   oster }
    101      1.3   oster 
    102      1.3   oster 
    103      1.1   oster 
    104      1.3   oster static void
    105      1.3   oster ReadRegionLog(
    106      1.3   oster     RF_RegionId_t regionID,
    107      1.3   oster     RF_MCPair_t * rrd_mcpair,
    108      1.3   oster     caddr_t regionBuffer,
    109      1.3   oster     RF_Raid_t * raidPtr,
    110      1.3   oster     RF_DagHeader_t ** rrd_dag_h,
    111      1.3   oster     RF_AllocListElem_t ** rrd_alloclist,
    112      1.3   oster     RF_PhysDiskAddr_t ** rrd_pda)
    113      1.3   oster {
    114      1.3   oster 	/* Initiate the read a region log from disk.  Once initiated, return
    115      1.3   oster 	 * to the calling routine.
    116      1.3   oster 	 *
    117      1.3   oster 	 * NON-BLOCKING */
    118      1.3   oster 
    119  1.4.2.1  bouyer 	RF_AccTraceEntry_t *tracerec;
    120      1.3   oster 	RF_DagNode_t *rrd_rdNode;
    121      1.3   oster 
    122      1.3   oster 	/* create DAG to read region log from disk */
    123      1.3   oster 	rf_MakeAllocList(*rrd_alloclist);
    124  1.4.2.1  bouyer 	*rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer,
    125  1.4.2.1  bouyer 				      rf_DiskReadFunc, rf_DiskReadUndoFunc,
    126  1.4.2.1  bouyer 				      "Rrl", *rrd_alloclist,
    127  1.4.2.1  bouyer 				      RF_DAG_FLAGS_NONE,
    128  1.4.2.1  bouyer 				      RF_IO_NORMAL_PRIORITY);
    129      1.3   oster 
    130      1.3   oster 	/* create and initialize PDA for the core log */
    131      1.3   oster 	/* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
    132      1.3   oster 	 * *)); */
    133      1.3   oster 	*rrd_pda = rf_AllocPDAList(1);
    134  1.4.2.1  bouyer 	rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row),
    135  1.4.2.1  bouyer 			       &((*rrd_pda)->col), &((*rrd_pda)->startSector));
    136      1.3   oster 	(*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity;
    137      1.3   oster 
    138      1.3   oster 	if ((*rrd_pda)->next) {
    139      1.3   oster 		(*rrd_pda)->next = NULL;
    140      1.3   oster 		printf("set rrd_pda->next to NULL\n");
    141      1.3   oster 	}
    142      1.3   oster 	/* initialize DAG parameters */
    143  1.4.2.1  bouyer 	RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
    144  1.4.2.1  bouyer 	bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
    145  1.4.2.1  bouyer 	(*rrd_dag_h)->tracerec = tracerec;
    146      1.3   oster 	rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0];
    147      1.3   oster 	rrd_rdNode->params[0].p = *rrd_pda;
    148      1.1   oster /*  rrd_rdNode->params[1] = regionBuffer; */
    149      1.3   oster 	rrd_rdNode->params[2].v = 0;
    150  1.4.2.1  bouyer 	rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    151  1.4.2.1  bouyer 						   0, 0, 0);
    152      1.1   oster 
    153      1.3   oster 	/* launch region log read dag */
    154      1.3   oster 	rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
    155      1.3   oster 	    (void *) rrd_mcpair);
    156      1.1   oster }
    157      1.1   oster 
    158      1.1   oster 
    159      1.1   oster 
    160      1.3   oster static void
    161      1.3   oster WriteCoreLog(
    162      1.3   oster     RF_ParityLog_t * log,
    163      1.3   oster     RF_MCPair_t * fwr_mcpair,
    164      1.3   oster     RF_Raid_t * raidPtr,
    165      1.3   oster     RF_DagHeader_t ** fwr_dag_h,
    166      1.3   oster     RF_AllocListElem_t ** fwr_alloclist,
    167      1.3   oster     RF_PhysDiskAddr_t ** fwr_pda)
    168      1.3   oster {
    169      1.3   oster 	RF_RegionId_t regionID = log->regionID;
    170  1.4.2.1  bouyer 	RF_AccTraceEntry_t *tracerec;
    171      1.3   oster 	RF_SectorNum_t regionOffset;
    172      1.3   oster 	RF_DagNode_t *fwr_wrNode;
    173      1.3   oster 
    174      1.3   oster 	/* Initiate the write of a core log to a region log disk. Once
    175      1.3   oster 	 * initiated, return to the calling routine.
    176      1.3   oster 	 *
    177      1.3   oster 	 * NON-BLOCKING */
    178      1.3   oster 
    179      1.3   oster 	/* create DAG to write a core log to a region log disk */
    180      1.3   oster 	rf_MakeAllocList(*fwr_alloclist);
    181  1.4.2.1  bouyer 	*fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr,
    182  1.4.2.1  bouyer 				      rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    183      1.3   oster 	    "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
    184      1.3   oster 
    185      1.3   oster 	/* create and initialize PDA for the region log */
    186      1.3   oster 	/* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
    187      1.3   oster 	 * *)); */
    188      1.3   oster 	*fwr_pda = rf_AllocPDAList(1);
    189      1.3   oster 	regionOffset = log->diskOffset;
    190  1.4.2.1  bouyer 	rf_MapLogParityLogging(raidPtr, regionID, regionOffset,
    191  1.4.2.1  bouyer 			       &((*fwr_pda)->row), &((*fwr_pda)->col),
    192  1.4.2.1  bouyer 			       &((*fwr_pda)->startSector));
    193      1.3   oster 	(*fwr_pda)->numSector = raidPtr->numSectorsPerLog;
    194      1.3   oster 
    195      1.3   oster 	/* initialize DAG parameters */
    196  1.4.2.1  bouyer 	RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
    197  1.4.2.1  bouyer 	bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
    198  1.4.2.1  bouyer 	(*fwr_dag_h)->tracerec = tracerec;
    199      1.3   oster 	fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0];
    200      1.3   oster 	fwr_wrNode->params[0].p = *fwr_pda;
    201      1.1   oster /*  fwr_wrNode->params[1] = log->bufPtr; */
    202      1.3   oster 	fwr_wrNode->params[2].v = 0;
    203  1.4.2.1  bouyer 	fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    204  1.4.2.1  bouyer 						   0, 0, 0);
    205      1.3   oster 
    206      1.3   oster 	/* launch the dag to write the core log to disk */
    207      1.3   oster 	rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
    208      1.3   oster 	    (void *) fwr_mcpair);
    209      1.3   oster }
    210      1.3   oster 
    211      1.3   oster 
    212      1.3   oster static void
    213      1.3   oster ReadRegionParity(
    214      1.3   oster     RF_RegionId_t regionID,
    215      1.3   oster     RF_MCPair_t * prd_mcpair,
    216      1.3   oster     caddr_t parityBuffer,
    217      1.3   oster     RF_Raid_t * raidPtr,
    218      1.3   oster     RF_DagHeader_t ** prd_dag_h,
    219      1.3   oster     RF_AllocListElem_t ** prd_alloclist,
    220      1.3   oster     RF_PhysDiskAddr_t ** prd_pda)
    221      1.3   oster {
    222      1.3   oster 	/* Initiate the read region parity from disk. Once initiated, return
    223      1.3   oster 	 * to the calling routine.
    224      1.3   oster 	 *
    225      1.3   oster 	 * NON-BLOCKING */
    226      1.3   oster 
    227  1.4.2.1  bouyer 	RF_AccTraceEntry_t *tracerec;
    228      1.3   oster 	RF_DagNode_t *prd_rdNode;
    229      1.3   oster 
    230      1.3   oster 	/* create DAG to read region parity from disk */
    231      1.3   oster 	rf_MakeAllocList(*prd_alloclist);
    232  1.4.2.1  bouyer 	*prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc,
    233  1.4.2.1  bouyer 				      rf_DiskReadUndoFunc, "Rrp",
    234  1.4.2.1  bouyer 				      *prd_alloclist, RF_DAG_FLAGS_NONE,
    235  1.4.2.1  bouyer 				      RF_IO_NORMAL_PRIORITY);
    236      1.3   oster 
    237      1.3   oster 	/* create and initialize PDA for region parity */
    238      1.3   oster 	/* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
    239      1.3   oster 	 * *)); */
    240      1.3   oster 	*prd_pda = rf_AllocPDAList(1);
    241  1.4.2.1  bouyer 	rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row),
    242  1.4.2.1  bouyer 			   &((*prd_pda)->col), &((*prd_pda)->startSector),
    243  1.4.2.1  bouyer 			   &((*prd_pda)->numSector));
    244      1.3   oster 	if (rf_parityLogDebug)
    245      1.3   oster 		printf("[reading %d sectors of parity from region %d]\n",
    246      1.3   oster 		    (int) (*prd_pda)->numSector, regionID);
    247      1.3   oster 	if ((*prd_pda)->next) {
    248      1.3   oster 		(*prd_pda)->next = NULL;
    249      1.3   oster 		printf("set prd_pda->next to NULL\n");
    250      1.3   oster 	}
    251      1.3   oster 	/* initialize DAG parameters */
    252  1.4.2.1  bouyer 	RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
    253  1.4.2.1  bouyer 	bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
    254  1.4.2.1  bouyer 	(*prd_dag_h)->tracerec = tracerec;
    255      1.3   oster 	prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0];
    256      1.3   oster 	prd_rdNode->params[0].p = *prd_pda;
    257      1.3   oster 	prd_rdNode->params[1].p = parityBuffer;
    258      1.3   oster 	prd_rdNode->params[2].v = 0;
    259  1.4.2.1  bouyer 	prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    260  1.4.2.1  bouyer 						   0, 0, 0);
    261      1.3   oster 	if (rf_validateDAGDebug)
    262      1.3   oster 		rf_ValidateDAG(*prd_dag_h);
    263      1.3   oster 	/* launch region parity read dag */
    264      1.3   oster 	rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
    265      1.3   oster 	    (void *) prd_mcpair);
    266      1.3   oster }
    267      1.3   oster 
    268      1.3   oster static void
    269      1.3   oster WriteRegionParity(
    270      1.3   oster     RF_RegionId_t regionID,
    271      1.3   oster     RF_MCPair_t * pwr_mcpair,
    272      1.3   oster     caddr_t parityBuffer,
    273      1.3   oster     RF_Raid_t * raidPtr,
    274      1.3   oster     RF_DagHeader_t ** pwr_dag_h,
    275      1.3   oster     RF_AllocListElem_t ** pwr_alloclist,
    276      1.3   oster     RF_PhysDiskAddr_t ** pwr_pda)
    277      1.3   oster {
    278      1.3   oster 	/* Initiate the write of region parity to disk. Once initiated, return
    279      1.3   oster 	 * to the calling routine.
    280      1.3   oster 	 *
    281      1.3   oster 	 * NON-BLOCKING */
    282      1.3   oster 
    283  1.4.2.1  bouyer 	RF_AccTraceEntry_t *tracerec;
    284      1.3   oster 	RF_DagNode_t *pwr_wrNode;
    285      1.3   oster 
    286      1.3   oster 	/* create DAG to write region log from disk */
    287      1.3   oster 	rf_MakeAllocList(*pwr_alloclist);
    288  1.4.2.1  bouyer 	*pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer,
    289  1.4.2.1  bouyer 				      rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    290  1.4.2.1  bouyer 				      "Wrp", *pwr_alloclist,
    291  1.4.2.1  bouyer 				      RF_DAG_FLAGS_NONE,
    292  1.4.2.1  bouyer 				      RF_IO_NORMAL_PRIORITY);
    293      1.3   oster 
    294      1.3   oster 	/* create and initialize PDA for region parity */
    295      1.3   oster 	/* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
    296      1.3   oster 	 * *)); */
    297      1.3   oster 	*pwr_pda = rf_AllocPDAList(1);
    298  1.4.2.1  bouyer 	rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row),
    299  1.4.2.1  bouyer 			   &((*pwr_pda)->col), &((*pwr_pda)->startSector),
    300  1.4.2.1  bouyer 			   &((*pwr_pda)->numSector));
    301      1.3   oster 
    302      1.3   oster 	/* initialize DAG parameters */
    303  1.4.2.1  bouyer 	RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
    304  1.4.2.1  bouyer 	bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
    305  1.4.2.1  bouyer 	(*pwr_dag_h)->tracerec = tracerec;
    306      1.3   oster 	pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0];
    307      1.3   oster 	pwr_wrNode->params[0].p = *pwr_pda;
    308      1.1   oster /*  pwr_wrNode->params[1] = parityBuffer; */
    309      1.3   oster 	pwr_wrNode->params[2].v = 0;
    310  1.4.2.1  bouyer 	pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
    311  1.4.2.1  bouyer 						   0, 0, 0);
    312      1.1   oster 
    313      1.3   oster 	/* launch the dag to write region parity to disk */
    314      1.3   oster 	rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
    315      1.3   oster 	    (void *) pwr_mcpair);
    316      1.3   oster }
    317      1.3   oster 
    318      1.3   oster static void
    319      1.3   oster FlushLogsToDisk(
    320      1.3   oster     RF_Raid_t * raidPtr,
    321      1.3   oster     RF_ParityLog_t * logList)
    322      1.3   oster {
    323      1.3   oster 	/* Flush a linked list of core logs to the log disk. Logs contain the
    324      1.3   oster 	 * disk location where they should be written.  Logs were written in
    325      1.3   oster 	 * FIFO order and that order must be preserved.
    326      1.3   oster 	 *
    327      1.3   oster 	 * Recommended optimizations: 1) allow multiple flushes to occur
    328      1.3   oster 	 * simultaneously 2) coalesce contiguous flush operations
    329      1.3   oster 	 *
    330      1.3   oster 	 * BLOCKING */
    331      1.3   oster 
    332      1.3   oster 	RF_ParityLog_t *log;
    333      1.3   oster 	RF_RegionId_t regionID;
    334      1.3   oster 	RF_MCPair_t *fwr_mcpair;
    335      1.3   oster 	RF_DagHeader_t *fwr_dag_h;
    336      1.3   oster 	RF_AllocListElem_t *fwr_alloclist;
    337      1.3   oster 	RF_PhysDiskAddr_t *fwr_pda;
    338      1.3   oster 
    339      1.3   oster 	fwr_mcpair = rf_AllocMCPair();
    340      1.3   oster 	RF_LOCK_MUTEX(fwr_mcpair->mutex);
    341      1.3   oster 
    342      1.3   oster 	RF_ASSERT(logList);
    343      1.3   oster 	log = logList;
    344      1.3   oster 	while (log) {
    345      1.3   oster 		regionID = log->regionID;
    346      1.3   oster 
    347      1.3   oster 		/* create and launch a DAG to write the core log */
    348      1.3   oster 		if (rf_parityLogDebug)
    349      1.3   oster 			printf("[initiating write of core log for region %d]\n", regionID);
    350      1.3   oster 		fwr_mcpair->flag = RF_FALSE;
    351  1.4.2.1  bouyer 		WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h,
    352  1.4.2.1  bouyer 			     &fwr_alloclist, &fwr_pda);
    353      1.3   oster 
    354      1.3   oster 		/* wait for the DAG to complete */
    355      1.3   oster 		while (!fwr_mcpair->flag)
    356      1.3   oster 			RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex);
    357      1.3   oster 		if (fwr_dag_h->status != rf_enable) {
    358      1.3   oster 			RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID);
    359      1.3   oster 			RF_ASSERT(0);
    360      1.3   oster 		}
    361      1.3   oster 		/* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */
    362      1.3   oster 		rf_FreePhysDiskAddr(fwr_pda);
    363      1.3   oster 		rf_FreeDAG(fwr_dag_h);
    364      1.3   oster 		rf_FreeAllocList(fwr_alloclist);
    365      1.3   oster 
    366      1.3   oster 		log = log->next;
    367      1.3   oster 	}
    368      1.3   oster 	RF_UNLOCK_MUTEX(fwr_mcpair->mutex);
    369      1.3   oster 	rf_FreeMCPair(fwr_mcpair);
    370      1.3   oster 	rf_ReleaseParityLogs(raidPtr, logList);
    371      1.3   oster }
    372      1.3   oster 
    373      1.3   oster static void
    374      1.3   oster ReintegrateRegion(
    375      1.3   oster     RF_Raid_t * raidPtr,
    376      1.3   oster     RF_RegionId_t regionID,
    377      1.3   oster     RF_ParityLog_t * coreLog)
    378      1.3   oster {
    379      1.3   oster 	RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair;
    380      1.3   oster 	RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h;
    381      1.3   oster 	RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist;
    382      1.3   oster 	RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda;
    383      1.3   oster 	caddr_t parityBuffer, regionBuffer = NULL;
    384      1.3   oster 
    385  1.4.2.1  bouyer 	/* Reintegrate a region (regionID).
    386  1.4.2.1  bouyer 	 *
    387  1.4.2.1  bouyer 	 * 1. acquire region and parity buffers
    388  1.4.2.1  bouyer 	 * 2. read log from disk
    389  1.4.2.1  bouyer 	 * 3. read parity from disk
    390  1.4.2.1  bouyer 	 * 4. apply log to parity
    391  1.4.2.1  bouyer 	 * 5. apply core log to parity
    392  1.4.2.1  bouyer 	 * 6. write new parity to disk
    393      1.3   oster 	 *
    394      1.3   oster 	 * BLOCKING */
    395      1.3   oster 
    396      1.3   oster 	if (rf_parityLogDebug)
    397      1.3   oster 		printf("[reintegrating region %d]\n", regionID);
    398      1.3   oster 
    399      1.3   oster 	/* initiate read of region parity */
    400      1.3   oster 	if (rf_parityLogDebug)
    401  1.4.2.1  bouyer 		printf("[initiating read of parity for region %d]\n",regionID);
    402      1.3   oster 	parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool);
    403      1.3   oster 	prd_mcpair = rf_AllocMCPair();
    404      1.3   oster 	RF_LOCK_MUTEX(prd_mcpair->mutex);
    405      1.3   oster 	prd_mcpair->flag = RF_FALSE;
    406  1.4.2.1  bouyer 	ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr,
    407  1.4.2.1  bouyer 			 &prd_dag_h, &prd_alloclist, &prd_pda);
    408      1.3   oster 
    409      1.3   oster 	/* if region log nonempty, initiate read */
    410      1.3   oster 	if (raidPtr->regionInfo[regionID].diskCount > 0) {
    411      1.3   oster 		if (rf_parityLogDebug)
    412  1.4.2.1  bouyer 			printf("[initiating read of disk log for region %d]\n",
    413  1.4.2.1  bouyer 			       regionID);
    414      1.3   oster 		regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool);
    415      1.3   oster 		rrd_mcpair = rf_AllocMCPair();
    416      1.3   oster 		RF_LOCK_MUTEX(rrd_mcpair->mutex);
    417      1.3   oster 		rrd_mcpair->flag = RF_FALSE;
    418  1.4.2.1  bouyer 		ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr,
    419  1.4.2.1  bouyer 			      &rrd_dag_h, &rrd_alloclist, &rrd_pda);
    420      1.3   oster 	}
    421      1.3   oster 	/* wait on read of region parity to complete */
    422      1.3   oster 	while (!prd_mcpair->flag) {
    423      1.3   oster 		RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex);
    424      1.3   oster 	}
    425      1.3   oster 	RF_UNLOCK_MUTEX(prd_mcpair->mutex);
    426      1.3   oster 	if (prd_dag_h->status != rf_enable) {
    427      1.3   oster 		RF_ERRORMSG("Unable to read parity from disk\n");
    428      1.3   oster 		/* add code to fail the parity disk */
    429      1.3   oster 		RF_ASSERT(0);
    430      1.3   oster 	}
    431      1.3   oster 	/* apply core log to parity */
    432      1.3   oster 	/* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */
    433      1.3   oster 
    434      1.3   oster 	if (raidPtr->regionInfo[regionID].diskCount > 0) {
    435      1.3   oster 		/* wait on read of region log to complete */
    436      1.3   oster 		while (!rrd_mcpair->flag)
    437      1.3   oster 			RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex);
    438      1.3   oster 		RF_UNLOCK_MUTEX(rrd_mcpair->mutex);
    439      1.3   oster 		if (rrd_dag_h->status != rf_enable) {
    440      1.3   oster 			RF_ERRORMSG("Unable to read region log from disk\n");
    441      1.3   oster 			/* add code to fail the log disk */
    442      1.3   oster 			RF_ASSERT(0);
    443      1.3   oster 		}
    444      1.3   oster 		/* apply region log to parity */
    445      1.3   oster 		/* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */
    446      1.3   oster 		/* release resources associated with region log */
    447      1.3   oster 		/* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */
    448      1.3   oster 		rf_FreePhysDiskAddr(rrd_pda);
    449      1.3   oster 		rf_FreeDAG(rrd_dag_h);
    450      1.3   oster 		rf_FreeAllocList(rrd_alloclist);
    451      1.3   oster 		rf_FreeMCPair(rrd_mcpair);
    452      1.3   oster 		ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer);
    453      1.3   oster 	}
    454      1.3   oster 	/* write reintegrated parity to disk */
    455      1.3   oster 	if (rf_parityLogDebug)
    456  1.4.2.1  bouyer 		printf("[initiating write of parity for region %d]\n",
    457  1.4.2.1  bouyer 		       regionID);
    458      1.3   oster 	pwr_mcpair = rf_AllocMCPair();
    459      1.3   oster 	RF_LOCK_MUTEX(pwr_mcpair->mutex);
    460      1.3   oster 	pwr_mcpair->flag = RF_FALSE;
    461  1.4.2.1  bouyer 	WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr,
    462  1.4.2.1  bouyer 			  &pwr_dag_h, &pwr_alloclist, &pwr_pda);
    463      1.3   oster 	while (!pwr_mcpair->flag)
    464      1.3   oster 		RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex);
    465      1.3   oster 	RF_UNLOCK_MUTEX(pwr_mcpair->mutex);
    466      1.3   oster 	if (pwr_dag_h->status != rf_enable) {
    467      1.3   oster 		RF_ERRORMSG("Unable to write parity to disk\n");
    468      1.3   oster 		/* add code to fail the parity disk */
    469      1.3   oster 		RF_ASSERT(0);
    470      1.3   oster 	}
    471      1.3   oster 	/* release resources associated with read of old parity */
    472      1.3   oster 	/* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */
    473      1.3   oster 	rf_FreePhysDiskAddr(prd_pda);
    474      1.3   oster 	rf_FreeDAG(prd_dag_h);
    475      1.3   oster 	rf_FreeAllocList(prd_alloclist);
    476      1.3   oster 	rf_FreeMCPair(prd_mcpair);
    477      1.3   oster 
    478      1.3   oster 	/* release resources associated with write of new parity */
    479      1.3   oster 	ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer);
    480      1.3   oster 	/* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */
    481      1.3   oster 	rf_FreePhysDiskAddr(pwr_pda);
    482      1.3   oster 	rf_FreeDAG(pwr_dag_h);
    483      1.3   oster 	rf_FreeAllocList(pwr_alloclist);
    484      1.3   oster 	rf_FreeMCPair(pwr_mcpair);
    485      1.3   oster 
    486      1.3   oster 	if (rf_parityLogDebug)
    487      1.3   oster 		printf("[finished reintegrating region %d]\n", regionID);
    488      1.3   oster }
    489      1.3   oster 
    490      1.3   oster 
    491      1.3   oster 
    492      1.3   oster static void
    493      1.3   oster ReintegrateLogs(
    494      1.3   oster     RF_Raid_t * raidPtr,
    495      1.3   oster     RF_ParityLog_t * logList)
    496      1.3   oster {
    497      1.3   oster 	RF_ParityLog_t *log, *freeLogList = NULL;
    498      1.3   oster 	RF_ParityLogData_t *logData, *logDataList;
    499      1.3   oster 	RF_RegionId_t regionID;
    500      1.3   oster 
    501      1.3   oster 	RF_ASSERT(logList);
    502      1.3   oster 	while (logList) {
    503      1.3   oster 		log = logList;
    504      1.3   oster 		logList = logList->next;
    505      1.3   oster 		log->next = NULL;
    506      1.3   oster 		regionID = log->regionID;
    507      1.3   oster 		ReintegrateRegion(raidPtr, regionID, log);
    508      1.3   oster 		log->numRecords = 0;
    509      1.3   oster 
    510      1.3   oster 		/* remove all items which are blocked on reintegration of this
    511      1.3   oster 		 * region */
    512      1.3   oster 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    513  1.4.2.1  bouyer 		logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID,
    514  1.4.2.1  bouyer 			   &raidPtr->parityLogDiskQueue.reintBlockHead,
    515  1.4.2.1  bouyer 			   &raidPtr->parityLogDiskQueue.reintBlockTail,
    516  1.4.2.1  bouyer 							   RF_TRUE);
    517      1.3   oster 		logDataList = logData;
    518      1.3   oster 		while (logData) {
    519  1.4.2.1  bouyer 			logData->next = rf_SearchAndDequeueParityLogData(
    520  1.4.2.1  bouyer 					 raidPtr, regionID,
    521  1.4.2.1  bouyer 					 &raidPtr->parityLogDiskQueue.reintBlockHead,
    522  1.4.2.1  bouyer 					 &raidPtr->parityLogDiskQueue.reintBlockTail,
    523  1.4.2.1  bouyer 					 RF_TRUE);
    524      1.3   oster 			logData = logData->next;
    525      1.3   oster 		}
    526      1.3   oster 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    527      1.3   oster 
    528      1.3   oster 		/* process blocked log data and clear reintInProgress flag for
    529      1.3   oster 		 * this region */
    530      1.3   oster 		if (logDataList)
    531      1.3   oster 			rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE);
    532      1.3   oster 		else {
    533      1.3   oster 			/* Enable flushing for this region.  Holding both
    534      1.3   oster 			 * locks provides a synchronization barrier with
    535      1.3   oster 			 * DumpParityLogToDisk */
    536      1.3   oster 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    537      1.3   oster 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    538      1.3   oster 			RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    539      1.3   oster 			raidPtr->regionInfo[regionID].diskCount = 0;
    540      1.3   oster 			raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
    541      1.3   oster 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    542      1.3   oster 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);	/* flushing is now
    543      1.3   oster 											 * enabled */
    544      1.3   oster 			RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    545      1.3   oster 		}
    546      1.3   oster 		/* if log wasn't used, attach it to the list of logs to be
    547      1.3   oster 		 * returned */
    548      1.3   oster 		if (log) {
    549      1.3   oster 			log->next = freeLogList;
    550      1.3   oster 			freeLogList = log;
    551      1.3   oster 		}
    552      1.3   oster 	}
    553      1.3   oster 	if (freeLogList)
    554      1.3   oster 		rf_ReleaseParityLogs(raidPtr, freeLogList);
    555      1.3   oster }
    556      1.3   oster 
    557      1.3   oster int
    558      1.3   oster rf_ShutdownLogging(RF_Raid_t * raidPtr)
    559      1.3   oster {
    560      1.3   oster 	/* shutdown parity logging 1) disable parity logging in all regions 2)
    561      1.3   oster 	 * reintegrate all regions */
    562      1.3   oster 
    563      1.3   oster 	RF_SectorCount_t diskCount;
    564      1.3   oster 	RF_RegionId_t regionID;
    565      1.3   oster 	RF_ParityLog_t *log;
    566      1.3   oster 
    567      1.3   oster 	if (rf_parityLogDebug)
    568      1.3   oster 		printf("[shutting down parity logging]\n");
    569      1.3   oster 	/* Since parity log maps are volatile, we must reintegrate all
    570      1.3   oster 	 * regions. */
    571      1.3   oster 	if (rf_forceParityLogReint) {
    572      1.3   oster 		for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
    573      1.3   oster 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    574  1.4.2.1  bouyer 			raidPtr->regionInfo[regionID].loggingEnabled =
    575  1.4.2.1  bouyer 				RF_FALSE;
    576      1.3   oster 			log = raidPtr->regionInfo[regionID].coreLog;
    577      1.3   oster 			raidPtr->regionInfo[regionID].coreLog = NULL;
    578      1.3   oster 			diskCount = raidPtr->regionInfo[regionID].diskCount;
    579      1.3   oster 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    580      1.3   oster 			if (diskCount > 0 || log != NULL)
    581      1.3   oster 				ReintegrateRegion(raidPtr, regionID, log);
    582      1.3   oster 			if (log != NULL)
    583      1.3   oster 				rf_ReleaseParityLogs(raidPtr, log);
    584      1.3   oster 		}
    585      1.3   oster 	}
    586      1.3   oster 	if (rf_parityLogDebug) {
    587      1.3   oster 		printf("[parity logging disabled]\n");
    588      1.3   oster 		printf("[should be done!]\n");
    589      1.3   oster 	}
    590      1.3   oster 	return (0);
    591      1.3   oster }
    592      1.3   oster 
    593      1.3   oster int
    594      1.3   oster rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr)
    595      1.3   oster {
    596      1.3   oster 	RF_ParityLog_t *reintQueue, *flushQueue;
    597      1.3   oster 	int     workNeeded, done = RF_FALSE;
    598  1.4.2.1  bouyer 	int s;
    599      1.3   oster 
    600      1.3   oster 	/* Main program for parity logging disk thread.  This routine waits
    601      1.3   oster 	 * for work to appear in either the flush or reintegration queues and
    602      1.3   oster 	 * is responsible for flushing core logs to the log disk as well as
    603      1.3   oster 	 * reintegrating parity regions.
    604      1.3   oster 	 *
    605      1.3   oster 	 * BLOCKING */
    606      1.3   oster 
    607  1.4.2.1  bouyer 	s = splbio();
    608  1.4.2.1  bouyer 
    609      1.3   oster 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    610      1.3   oster 
    611      1.3   oster 	/*
    612      1.3   oster          * Inform our creator that we're running. Don't bother doing the
    613      1.3   oster          * mutex lock/unlock dance- we locked above, and we'll unlock
    614      1.3   oster          * below with nothing to do, yet.
    615      1.3   oster          */
    616      1.3   oster 	raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING;
    617      1.3   oster 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
    618      1.3   oster 
    619      1.3   oster 	/* empty the work queues */
    620      1.3   oster 	flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
    621      1.3   oster 	raidPtr->parityLogDiskQueue.flushQueue = NULL;
    622      1.3   oster 	reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
    623      1.3   oster 	raidPtr->parityLogDiskQueue.reintQueue = NULL;
    624      1.3   oster 	workNeeded = (flushQueue || reintQueue);
    625      1.3   oster 
    626      1.3   oster 	while (!done) {
    627      1.3   oster 		while (workNeeded) {
    628      1.3   oster 			/* First, flush all logs in the flush queue, freeing
    629      1.3   oster 			 * buffers Second, reintegrate all regions which are
    630      1.3   oster 			 * reported as full. Third, append queued log data
    631      1.3   oster 			 * until blocked.
    632      1.3   oster 			 *
    633      1.3   oster 			 * Note: Incoming appends (ParityLogAppend) can block on
    634      1.3   oster 			 * either 1. empty buffer pool 2. region under
    635      1.3   oster 			 * reintegration To preserve a global FIFO ordering of
    636      1.3   oster 			 * appends, buffers are not released to the world
    637      1.3   oster 			 * until those appends blocked on buffers are removed
    638      1.3   oster 			 * from the append queue.  Similarly, regions which
    639      1.3   oster 			 * are reintegrated are not opened for general use
    640      1.3   oster 			 * until the append queue has been emptied. */
    641      1.3   oster 
    642      1.3   oster 			RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    643      1.3   oster 
    644      1.3   oster 			/* empty flushQueue, using free'd log buffers to
    645      1.3   oster 			 * process bufTail */
    646      1.3   oster 			if (flushQueue)
    647  1.4.2.1  bouyer 			       FlushLogsToDisk(raidPtr, flushQueue);
    648      1.3   oster 
    649      1.3   oster 			/* empty reintQueue, flushing from reintTail as we go */
    650      1.3   oster 			if (reintQueue)
    651      1.3   oster 				ReintegrateLogs(raidPtr, reintQueue);
    652      1.3   oster 
    653      1.3   oster 			RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    654      1.3   oster 			flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
    655      1.3   oster 			raidPtr->parityLogDiskQueue.flushQueue = NULL;
    656      1.3   oster 			reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
    657      1.3   oster 			raidPtr->parityLogDiskQueue.reintQueue = NULL;
    658      1.3   oster 			workNeeded = (flushQueue || reintQueue);
    659      1.3   oster 		}
    660      1.3   oster 		/* no work is needed at this point */
    661      1.3   oster 		if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) {
    662      1.3   oster 			/* shutdown parity logging 1. disable parity logging
    663      1.3   oster 			 * in all regions 2. reintegrate all regions */
    664      1.3   oster 			done = RF_TRUE;	/* thread disabled, no work needed */
    665      1.3   oster 			RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    666      1.3   oster 			rf_ShutdownLogging(raidPtr);
    667      1.3   oster 		}
    668      1.3   oster 		if (!done) {
    669      1.3   oster 			/* thread enabled, no work needed, so sleep */
    670      1.3   oster 			if (rf_parityLogDebug)
    671      1.3   oster 				printf("[parity logging disk manager sleeping]\n");
    672  1.4.2.1  bouyer 			RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond,
    673  1.4.2.1  bouyer 				     raidPtr->parityLogDiskQueue.mutex);
    674      1.3   oster 			if (rf_parityLogDebug)
    675      1.3   oster 				printf("[parity logging disk manager just woke up]\n");
    676      1.3   oster 			flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
    677      1.3   oster 			raidPtr->parityLogDiskQueue.flushQueue = NULL;
    678      1.3   oster 			reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
    679      1.3   oster 			raidPtr->parityLogDiskQueue.reintQueue = NULL;
    680      1.3   oster 			workNeeded = (flushQueue || reintQueue);
    681      1.3   oster 		}
    682      1.3   oster 	}
    683      1.3   oster 	/*
    684      1.3   oster          * Announce that we're done.
    685      1.3   oster          */
    686      1.3   oster 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    687      1.3   oster 	raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN;
    688      1.3   oster 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    689      1.3   oster 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
    690  1.4.2.1  bouyer 
    691  1.4.2.1  bouyer 	splx(s);
    692  1.4.2.1  bouyer 
    693      1.3   oster 	/*
    694      1.3   oster          * In the NetBSD kernel, the thread must exit; returning would
    695      1.3   oster          * cause the proc trampoline to attempt to return to userspace.
    696      1.3   oster          */
    697      1.3   oster 	kthread_exit(0);	/* does not return */
    698      1.1   oster }
    699      1.3   oster #endif				/* RF_INCLUDE_PARITYLOGGING > 0 */
    700