Home | History | Annotate | Line # | Download | only in raidframe
rf_paritylogDiskMgr.c revision 1.2
      1  1.2  oster /*	$NetBSD: rf_paritylogDiskMgr.c,v 1.2 1999/01/26 02:33:59 oster Exp $	*/
      2  1.1  oster /*
      3  1.1  oster  * Copyright (c) 1995 Carnegie-Mellon University.
      4  1.1  oster  * All rights reserved.
      5  1.1  oster  *
      6  1.1  oster  * Author: William V. Courtright II
      7  1.1  oster  *
      8  1.1  oster  * Permission to use, copy, modify and distribute this software and
      9  1.1  oster  * its documentation is hereby granted, provided that both the copyright
     10  1.1  oster  * notice and this permission notice appear in all copies of the
     11  1.1  oster  * software, derivative works or modified versions, and any portions
     12  1.1  oster  * thereof, and that both notices appear in supporting documentation.
     13  1.1  oster  *
     14  1.1  oster  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  1.1  oster  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  1.1  oster  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  1.1  oster  *
     18  1.1  oster  * Carnegie Mellon requests users of this software to return to
     19  1.1  oster  *
     20  1.1  oster  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  1.1  oster  *  School of Computer Science
     22  1.1  oster  *  Carnegie Mellon University
     23  1.1  oster  *  Pittsburgh PA 15213-3890
     24  1.1  oster  *
     25  1.1  oster  * any improvements or extensions that they make and grant Carnegie the
     26  1.1  oster  * rights to redistribute these changes.
     27  1.1  oster  */
     28  1.1  oster /* Code for flushing and reintegration operations related to parity logging.
     29  1.1  oster  *
     30  1.1  oster  */
     31  1.1  oster 
     32  1.1  oster #include "rf_archs.h"
     33  1.1  oster 
     34  1.1  oster #if RF_INCLUDE_PARITYLOGGING > 0
     35  1.1  oster 
     36  1.1  oster #include "rf_types.h"
     37  1.1  oster #include "rf_threadstuff.h"
     38  1.1  oster #include "rf_mcpair.h"
     39  1.1  oster #include "rf_raid.h"
     40  1.1  oster #include "rf_dag.h"
     41  1.1  oster #include "rf_dagfuncs.h"
     42  1.1  oster #include "rf_desc.h"
     43  1.1  oster #include "rf_layout.h"
     44  1.1  oster #include "rf_diskqueue.h"
     45  1.1  oster #include "rf_paritylog.h"
     46  1.1  oster #include "rf_general.h"
     47  1.1  oster #include "rf_threadid.h"
     48  1.1  oster #include "rf_etimer.h"
     49  1.1  oster #include "rf_paritylogging.h"
     50  1.1  oster #include "rf_engine.h"
     51  1.1  oster #include "rf_dagutils.h"
     52  1.1  oster #include "rf_map.h"
     53  1.1  oster #include "rf_parityscan.h"
     54  1.1  oster #include "rf_sys.h"
     55  1.1  oster 
     56  1.1  oster #include "rf_paritylogDiskMgr.h"
     57  1.1  oster 
     58  1.1  oster static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *);
     59  1.1  oster 
     60  1.1  oster static caddr_t AcquireReintBuffer(pool)
     61  1.1  oster   RF_RegionBufferQueue_t  *pool;
     62  1.1  oster {
     63  1.1  oster   caddr_t bufPtr = NULL;
     64  1.1  oster 
     65  1.1  oster   /* Return a region buffer from the free list (pool).
     66  1.1  oster      If the free list is empty, WAIT.
     67  1.1  oster      BLOCKING */
     68  1.1  oster 
     69  1.1  oster   RF_LOCK_MUTEX(pool->mutex);
     70  1.1  oster   if (pool->availableBuffers > 0) {
     71  1.1  oster     bufPtr = pool->buffers[pool->availBuffersIndex];
     72  1.1  oster     pool->availableBuffers--;
     73  1.1  oster     pool->availBuffersIndex++;
     74  1.1  oster     if (pool->availBuffersIndex == pool->totalBuffers)
     75  1.1  oster       pool->availBuffersIndex = 0;
     76  1.1  oster     RF_UNLOCK_MUTEX(pool->mutex);
     77  1.1  oster   }
     78  1.1  oster   else {
     79  1.1  oster     RF_PANIC(); /* should never happen in currect config, single reint */
     80  1.1  oster     RF_WAIT_COND(pool->cond, pool->mutex);
     81  1.1  oster   }
     82  1.1  oster   return(bufPtr);
     83  1.1  oster }
     84  1.1  oster 
     85  1.1  oster static void ReleaseReintBuffer(
     86  1.1  oster   RF_RegionBufferQueue_t  *pool,
     87  1.1  oster   caddr_t                  bufPtr)
     88  1.1  oster {
     89  1.1  oster   /* Insert a region buffer (bufPtr) into the free list (pool).
     90  1.1  oster      NON-BLOCKING */
     91  1.1  oster 
     92  1.1  oster   RF_LOCK_MUTEX(pool->mutex);
     93  1.1  oster   pool->availableBuffers++;
     94  1.1  oster   pool->buffers[pool->emptyBuffersIndex] = bufPtr;
     95  1.1  oster   pool->emptyBuffersIndex++;
     96  1.1  oster   if (pool->emptyBuffersIndex == pool->totalBuffers)
     97  1.1  oster     pool->emptyBuffersIndex = 0;
     98  1.1  oster   RF_ASSERT(pool->availableBuffers <= pool->totalBuffers);
     99  1.1  oster   RF_UNLOCK_MUTEX(pool->mutex);
    100  1.1  oster   RF_SIGNAL_COND(pool->cond);
    101  1.1  oster }
    102  1.1  oster 
    103  1.1  oster 
    104  1.1  oster 
    105  1.1  oster static void ReadRegionLog(
    106  1.1  oster   RF_RegionId_t         regionID,
    107  1.1  oster   RF_MCPair_t          *rrd_mcpair,
    108  1.1  oster   caddr_t               regionBuffer,
    109  1.1  oster   RF_Raid_t            *raidPtr,
    110  1.1  oster   RF_DagHeader_t      **rrd_dag_h,
    111  1.1  oster   RF_AllocListElem_t  **rrd_alloclist,
    112  1.1  oster   RF_PhysDiskAddr_t   **rrd_pda)
    113  1.1  oster {
    114  1.1  oster   /* Initiate the read a region log from disk.  Once initiated, return
    115  1.1  oster      to the calling routine.
    116  1.1  oster 
    117  1.1  oster      NON-BLOCKING
    118  1.1  oster    */
    119  1.1  oster 
    120  1.1  oster   RF_AccTraceEntry_t tracerec;
    121  1.1  oster   RF_DagNode_t *rrd_rdNode;
    122  1.1  oster 
    123  1.1  oster   /* create DAG to read region log from disk */
    124  1.1  oster   rf_MakeAllocList(*rrd_alloclist);
    125  1.1  oster   *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, rf_DiskReadFunc, rf_DiskReadUndoFunc,
    126  1.1  oster 			     "Rrl", *rrd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
    127  1.1  oster 
    128  1.1  oster   /* create and initialize PDA for the core log */
    129  1.1  oster   /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
    130  1.1  oster   *rrd_pda = rf_AllocPDAList(1);
    131  1.1  oster   rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), &((*rrd_pda)->col), &((*rrd_pda)->startSector));
    132  1.1  oster   (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity;
    133  1.1  oster 
    134  1.1  oster   if ((*rrd_pda)->next) {
    135  1.1  oster     (*rrd_pda)->next = NULL;
    136  1.1  oster     printf("set rrd_pda->next to NULL\n");
    137  1.1  oster   }
    138  1.1  oster 
    139  1.1  oster   /* initialize DAG parameters */
    140  1.1  oster   bzero((char *)&tracerec,sizeof(tracerec));
    141  1.1  oster   (*rrd_dag_h)->tracerec = &tracerec;
    142  1.1  oster   rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0];
    143  1.1  oster   rrd_rdNode->params[0].p = *rrd_pda;
    144  1.1  oster /*  rrd_rdNode->params[1] = regionBuffer; */
    145  1.1  oster   rrd_rdNode->params[2].v = 0;
    146  1.1  oster   rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
    147  1.1  oster 
    148  1.1  oster   /* launch region log read dag */
    149  1.1  oster   rf_DispatchDAG(*rrd_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
    150  1.1  oster 		 (void *) rrd_mcpair);
    151  1.1  oster }
    152  1.1  oster 
    153  1.1  oster 
    154  1.1  oster 
    155  1.1  oster static void WriteCoreLog(
    156  1.1  oster   RF_ParityLog_t       *log,
    157  1.1  oster   RF_MCPair_t          *fwr_mcpair,
    158  1.1  oster   RF_Raid_t            *raidPtr,
    159  1.1  oster   RF_DagHeader_t      **fwr_dag_h,
    160  1.1  oster   RF_AllocListElem_t  **fwr_alloclist,
    161  1.1  oster   RF_PhysDiskAddr_t   **fwr_pda)
    162  1.1  oster {
    163  1.1  oster   RF_RegionId_t regionID = log->regionID;
    164  1.1  oster   RF_AccTraceEntry_t tracerec;
    165  1.1  oster   RF_SectorNum_t regionOffset;
    166  1.1  oster   RF_DagNode_t *fwr_wrNode;
    167  1.1  oster 
    168  1.1  oster   /* Initiate the write of a core log to a region log disk.
    169  1.1  oster      Once initiated, return to the calling routine.
    170  1.1  oster 
    171  1.1  oster      NON-BLOCKING
    172  1.1  oster    */
    173  1.1  oster 
    174  1.1  oster   /* create DAG to write a core log to a region log disk */
    175  1.1  oster   rf_MakeAllocList(*fwr_alloclist);
    176  1.1  oster   *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    177  1.1  oster 			     "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
    178  1.1  oster 
    179  1.1  oster   /* create and initialize PDA for the region log */
    180  1.1  oster   /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
    181  1.1  oster   *fwr_pda = rf_AllocPDAList(1);
    182  1.1  oster   regionOffset = log->diskOffset;
    183  1.1  oster   rf_MapLogParityLogging(raidPtr, regionID, regionOffset, &((*fwr_pda)->row), &((*fwr_pda)->col), &((*fwr_pda)->startSector));
    184  1.1  oster   (*fwr_pda)->numSector = raidPtr->numSectorsPerLog;
    185  1.1  oster 
    186  1.1  oster   /* initialize DAG parameters */
    187  1.1  oster   bzero((char *)&tracerec,sizeof(tracerec));
    188  1.1  oster   (*fwr_dag_h)->tracerec = &tracerec;
    189  1.1  oster   fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0];
    190  1.1  oster   fwr_wrNode->params[0].p = *fwr_pda;
    191  1.1  oster /*  fwr_wrNode->params[1] = log->bufPtr; */
    192  1.1  oster   fwr_wrNode->params[2].v = 0;
    193  1.1  oster   fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
    194  1.1  oster 
    195  1.1  oster   /* launch the dag to write the core log to disk */
    196  1.1  oster   rf_DispatchDAG(*fwr_dag_h, (void (*)(void *)) rf_MCPairWakeupFunc,
    197  1.1  oster 		 (void *) fwr_mcpair);
    198  1.1  oster }
    199  1.1  oster 
    200  1.1  oster 
    201  1.1  oster static void ReadRegionParity(
    202  1.1  oster   RF_RegionId_t         regionID,
    203  1.1  oster   RF_MCPair_t          *prd_mcpair,
    204  1.1  oster   caddr_t               parityBuffer,
    205  1.1  oster   RF_Raid_t            *raidPtr,
    206  1.1  oster   RF_DagHeader_t      **prd_dag_h,
    207  1.1  oster   RF_AllocListElem_t  **prd_alloclist,
    208  1.1  oster   RF_PhysDiskAddr_t   **prd_pda)
    209  1.1  oster {
    210  1.1  oster   /* Initiate the read region parity from disk.
    211  1.1  oster      Once initiated, return to the calling routine.
    212  1.1  oster 
    213  1.1  oster      NON-BLOCKING
    214  1.1  oster    */
    215  1.1  oster 
    216  1.1  oster   RF_AccTraceEntry_t tracerec;
    217  1.1  oster   RF_DagNode_t *prd_rdNode;
    218  1.1  oster 
    219  1.1  oster   /* create DAG to read region parity from disk */
    220  1.1  oster   rf_MakeAllocList(*prd_alloclist);
    221  1.1  oster   *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, rf_DiskReadUndoFunc,
    222  1.1  oster 			     "Rrp", *prd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
    223  1.1  oster 
    224  1.1  oster   /* create and initialize PDA for region parity */
    225  1.1  oster   /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
    226  1.1  oster   *prd_pda = rf_AllocPDAList(1);
    227  1.1  oster   rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), &((*prd_pda)->col), &((*prd_pda)->startSector), &((*prd_pda)->numSector));
    228  1.1  oster   if (rf_parityLogDebug)
    229  1.1  oster     printf("[reading %d sectors of parity from region %d]\n",
    230  1.1  oster 	   (int)(*prd_pda)->numSector, regionID);
    231  1.1  oster   if ((*prd_pda)->next) {
    232  1.1  oster     (*prd_pda)->next = NULL;
    233  1.1  oster     printf("set prd_pda->next to NULL\n");
    234  1.1  oster   }
    235  1.1  oster 
    236  1.1  oster   /* initialize DAG parameters */
    237  1.1  oster   bzero((char *)&tracerec,sizeof(tracerec));
    238  1.1  oster   (*prd_dag_h)->tracerec = &tracerec;
    239  1.1  oster   prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0];
    240  1.1  oster   prd_rdNode->params[0].p = *prd_pda;
    241  1.1  oster   prd_rdNode->params[1].p = parityBuffer;
    242  1.1  oster   prd_rdNode->params[2].v = 0;
    243  1.1  oster   prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
    244  1.1  oster   if (rf_validateDAGDebug)
    245  1.1  oster     rf_ValidateDAG(*prd_dag_h);
    246  1.1  oster   /* launch region parity read dag */
    247  1.1  oster   rf_DispatchDAG(*prd_dag_h, (void (*)(void *)) rf_MCPairWakeupFunc,
    248  1.1  oster 		 (void *) prd_mcpair);
    249  1.1  oster }
    250  1.1  oster 
    251  1.1  oster static void WriteRegionParity(
    252  1.1  oster   RF_RegionId_t         regionID,
    253  1.1  oster   RF_MCPair_t          *pwr_mcpair,
    254  1.1  oster   caddr_t               parityBuffer,
    255  1.1  oster   RF_Raid_t            *raidPtr,
    256  1.1  oster   RF_DagHeader_t      **pwr_dag_h,
    257  1.1  oster   RF_AllocListElem_t  **pwr_alloclist,
    258  1.1  oster   RF_PhysDiskAddr_t   **pwr_pda)
    259  1.1  oster {
    260  1.1  oster   /* Initiate the write of region parity to disk.
    261  1.1  oster      Once initiated, return to the calling routine.
    262  1.1  oster 
    263  1.1  oster      NON-BLOCKING
    264  1.1  oster    */
    265  1.1  oster 
    266  1.1  oster   RF_AccTraceEntry_t tracerec;
    267  1.1  oster   RF_DagNode_t *pwr_wrNode;
    268  1.1  oster 
    269  1.1  oster   /* create DAG to write region log from disk */
    270  1.1  oster   rf_MakeAllocList(*pwr_alloclist);
    271  1.1  oster   *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    272  1.1  oster 			     "Wrp", *pwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
    273  1.1  oster 
    274  1.1  oster   /* create and initialize PDA for region parity */
    275  1.1  oster   /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
    276  1.1  oster   *pwr_pda = rf_AllocPDAList(1);
    277  1.1  oster   rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), &((*pwr_pda)->col), &((*pwr_pda)->startSector), &((*pwr_pda)->numSector));
    278  1.1  oster 
    279  1.1  oster   /* initialize DAG parameters */
    280  1.1  oster   bzero((char *)&tracerec,sizeof(tracerec));
    281  1.1  oster   (*pwr_dag_h)->tracerec = &tracerec;
    282  1.1  oster   pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0];
    283  1.1  oster   pwr_wrNode->params[0].p = *pwr_pda;
    284  1.1  oster /*  pwr_wrNode->params[1] = parityBuffer; */
    285  1.1  oster   pwr_wrNode->params[2].v = 0;
    286  1.1  oster   pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
    287  1.1  oster 
    288  1.1  oster   /* launch the dag to write region parity to disk */
    289  1.1  oster   rf_DispatchDAG(*pwr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
    290  1.1  oster 			      (void *) pwr_mcpair);
    291  1.1  oster }
    292  1.1  oster 
    293  1.1  oster static void FlushLogsToDisk(
    294  1.1  oster   RF_Raid_t       *raidPtr,
    295  1.1  oster   RF_ParityLog_t  *logList)
    296  1.1  oster {
    297  1.1  oster   /* Flush a linked list of core logs to the log disk.
    298  1.1  oster      Logs contain the disk location where they should be
    299  1.1  oster      written.  Logs were written in FIFO order and that
    300  1.1  oster      order must be preserved.
    301  1.1  oster 
    302  1.1  oster      Recommended optimizations:
    303  1.1  oster        1) allow multiple flushes to occur simultaneously
    304  1.1  oster        2) coalesce contiguous flush operations
    305  1.1  oster 
    306  1.1  oster      BLOCKING
    307  1.1  oster      */
    308  1.1  oster 
    309  1.1  oster   RF_ParityLog_t *log;
    310  1.1  oster   RF_RegionId_t regionID;
    311  1.1  oster   RF_MCPair_t *fwr_mcpair;
    312  1.1  oster   RF_DagHeader_t *fwr_dag_h;
    313  1.1  oster   RF_AllocListElem_t *fwr_alloclist;
    314  1.1  oster   RF_PhysDiskAddr_t *fwr_pda;
    315  1.1  oster 
    316  1.1  oster   fwr_mcpair = rf_AllocMCPair();
    317  1.1  oster   RF_LOCK_MUTEX(fwr_mcpair->mutex);
    318  1.1  oster 
    319  1.1  oster   RF_ASSERT(logList);
    320  1.1  oster   log = logList;
    321  1.1  oster   while (log)
    322  1.1  oster     {
    323  1.1  oster       regionID = log->regionID;
    324  1.1  oster 
    325  1.1  oster       /* create and launch a DAG to write the core log */
    326  1.1  oster       if (rf_parityLogDebug)
    327  1.1  oster 	printf("[initiating write of core log for region %d]\n", regionID);
    328  1.1  oster       fwr_mcpair->flag = RF_FALSE;
    329  1.1  oster       WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, &fwr_alloclist, &fwr_pda);
    330  1.1  oster 
    331  1.1  oster       /* wait for the DAG to complete */
    332  1.1  oster       while (!fwr_mcpair->flag)
    333  1.1  oster 	RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex);
    334  1.1  oster       if (fwr_dag_h->status != rf_enable)
    335  1.1  oster 	{
    336  1.1  oster 	  RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID);
    337  1.1  oster 	  RF_ASSERT(0);
    338  1.1  oster 	}
    339  1.1  oster 
    340  1.1  oster       /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */
    341  1.1  oster       rf_FreePhysDiskAddr(fwr_pda);
    342  1.1  oster       rf_FreeDAG(fwr_dag_h);
    343  1.1  oster       rf_FreeAllocList(fwr_alloclist);
    344  1.1  oster 
    345  1.1  oster       log = log->next;
    346  1.1  oster     }
    347  1.1  oster   RF_UNLOCK_MUTEX(fwr_mcpair->mutex);
    348  1.1  oster   rf_FreeMCPair(fwr_mcpair);
    349  1.1  oster   rf_ReleaseParityLogs(raidPtr, logList);
    350  1.1  oster }
    351  1.1  oster 
    352  1.1  oster static void ReintegrateRegion(
    353  1.1  oster   RF_Raid_t       *raidPtr,
    354  1.1  oster   RF_RegionId_t    regionID,
    355  1.1  oster   RF_ParityLog_t  *coreLog)
    356  1.1  oster {
    357  1.1  oster   RF_MCPair_t *rrd_mcpair=NULL, *prd_mcpair, *pwr_mcpair;
    358  1.1  oster   RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h;
    359  1.1  oster   RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist;
    360  1.1  oster   RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda;
    361  1.1  oster   caddr_t parityBuffer, regionBuffer=NULL;
    362  1.1  oster 
    363  1.1  oster   /* Reintegrate a region (regionID).
    364  1.1  oster      1. acquire region and parity buffers
    365  1.1  oster      2. read log from disk
    366  1.1  oster      3. read parity from disk
    367  1.1  oster      4. apply log to parity
    368  1.1  oster      5. apply core log to parity
    369  1.1  oster      6. write new parity to disk
    370  1.1  oster 
    371  1.1  oster      BLOCKING
    372  1.1  oster     */
    373  1.1  oster 
    374  1.1  oster   if (rf_parityLogDebug)
    375  1.1  oster     printf("[reintegrating region %d]\n", regionID);
    376  1.1  oster 
    377  1.1  oster   /* initiate read of region parity */
    378  1.1  oster   if (rf_parityLogDebug)
    379  1.1  oster     printf("[initiating read of parity for region %d]\n", regionID);
    380  1.1  oster   parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool);
    381  1.1  oster   prd_mcpair = rf_AllocMCPair();
    382  1.1  oster   RF_LOCK_MUTEX(prd_mcpair->mutex);
    383  1.1  oster   prd_mcpair->flag = RF_FALSE;
    384  1.1  oster   ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, &prd_dag_h, &prd_alloclist, &prd_pda);
    385  1.1  oster 
    386  1.1  oster   /* if region log nonempty, initiate read */
    387  1.1  oster   if (raidPtr->regionInfo[regionID].diskCount > 0)
    388  1.1  oster     {
    389  1.1  oster       if (rf_parityLogDebug)
    390  1.1  oster 	printf("[initiating read of disk log for region %d]\n", regionID);
    391  1.1  oster       regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool);
    392  1.1  oster       rrd_mcpair = rf_AllocMCPair();
    393  1.1  oster       RF_LOCK_MUTEX(rrd_mcpair->mutex);
    394  1.1  oster       rrd_mcpair->flag = RF_FALSE;
    395  1.1  oster       ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, &rrd_dag_h, &rrd_alloclist, &rrd_pda);
    396  1.1  oster     }
    397  1.1  oster 
    398  1.1  oster   /* wait on read of region parity to complete */
    399  1.1  oster   while (!prd_mcpair->flag) {
    400  1.1  oster     RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex);
    401  1.1  oster   }
    402  1.1  oster   RF_UNLOCK_MUTEX(prd_mcpair->mutex);
    403  1.1  oster   if (prd_dag_h->status != rf_enable)
    404  1.1  oster     {
    405  1.1  oster       RF_ERRORMSG("Unable to read parity from disk\n");
    406  1.1  oster       /* add code to fail the parity disk */
    407  1.1  oster       RF_ASSERT(0);
    408  1.1  oster     }
    409  1.1  oster 
    410  1.1  oster   /* apply core log to parity */
    411  1.1  oster   /*  if (coreLog)
    412  1.1  oster       ApplyLogsToParity(coreLog, parityBuffer); */
    413  1.1  oster 
    414  1.1  oster   if (raidPtr->regionInfo[regionID].diskCount > 0)
    415  1.1  oster     {
    416  1.1  oster       /* wait on read of region log to complete */
    417  1.1  oster       while (!rrd_mcpair->flag)
    418  1.1  oster 	RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex);
    419  1.1  oster       RF_UNLOCK_MUTEX(rrd_mcpair->mutex);
    420  1.1  oster       if (rrd_dag_h->status != rf_enable)
    421  1.1  oster 	{
    422  1.1  oster 	  RF_ERRORMSG("Unable to read region log from disk\n");
    423  1.1  oster 	  /* add code to fail the log disk */
    424  1.1  oster 	  RF_ASSERT(0);
    425  1.1  oster 	}
    426  1.1  oster       /* apply region log to parity */
    427  1.1  oster       /*      ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */
    428  1.1  oster       /* release resources associated with region log */
    429  1.1  oster       /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */
    430  1.1  oster       rf_FreePhysDiskAddr(rrd_pda);
    431  1.1  oster       rf_FreeDAG(rrd_dag_h);
    432  1.1  oster       rf_FreeAllocList(rrd_alloclist);
    433  1.1  oster       rf_FreeMCPair(rrd_mcpair);
    434  1.1  oster       ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer);
    435  1.1  oster     }
    436  1.1  oster 
    437  1.1  oster   /* write reintegrated parity to disk */
    438  1.1  oster   if (rf_parityLogDebug)
    439  1.1  oster     printf("[initiating write of parity for region %d]\n", regionID);
    440  1.1  oster   pwr_mcpair = rf_AllocMCPair();
    441  1.1  oster   RF_LOCK_MUTEX(pwr_mcpair->mutex);
    442  1.1  oster   pwr_mcpair->flag = RF_FALSE;
    443  1.1  oster   WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, &pwr_dag_h, &pwr_alloclist, &pwr_pda);
    444  1.1  oster   while (!pwr_mcpair->flag)
    445  1.1  oster     RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex);
    446  1.1  oster   RF_UNLOCK_MUTEX(pwr_mcpair->mutex);
    447  1.1  oster   if (pwr_dag_h->status != rf_enable)
    448  1.1  oster     {
    449  1.1  oster       RF_ERRORMSG("Unable to write parity to disk\n");
    450  1.1  oster       /* add code to fail the parity disk */
    451  1.1  oster       RF_ASSERT(0);
    452  1.1  oster     }
    453  1.1  oster 
    454  1.1  oster   /* release resources associated with read of old parity */
    455  1.1  oster   /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */
    456  1.1  oster   rf_FreePhysDiskAddr(prd_pda);
    457  1.1  oster   rf_FreeDAG(prd_dag_h);
    458  1.1  oster   rf_FreeAllocList(prd_alloclist);
    459  1.1  oster   rf_FreeMCPair(prd_mcpair);
    460  1.1  oster 
    461  1.1  oster   /* release resources associated with write of new parity */
    462  1.1  oster   ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer);
    463  1.1  oster   /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */
    464  1.1  oster   rf_FreePhysDiskAddr(pwr_pda);
    465  1.1  oster   rf_FreeDAG(pwr_dag_h);
    466  1.1  oster   rf_FreeAllocList(pwr_alloclist);
    467  1.1  oster   rf_FreeMCPair(pwr_mcpair);
    468  1.1  oster 
    469  1.1  oster   if (rf_parityLogDebug)
    470  1.1  oster     printf("[finished reintegrating region %d]\n", regionID);
    471  1.1  oster }
    472  1.1  oster 
    473  1.1  oster 
    474  1.1  oster 
    475  1.1  oster static void ReintegrateLogs(
    476  1.1  oster   RF_Raid_t       *raidPtr,
    477  1.1  oster   RF_ParityLog_t  *logList)
    478  1.1  oster {
    479  1.1  oster   RF_ParityLog_t *log, *freeLogList = NULL;
    480  1.1  oster   RF_ParityLogData_t *logData, *logDataList;
    481  1.1  oster   RF_RegionId_t regionID;
    482  1.1  oster 
    483  1.1  oster   RF_ASSERT(logList);
    484  1.1  oster   while (logList)
    485  1.1  oster     {
    486  1.1  oster       log = logList;
    487  1.1  oster       logList = logList->next;
    488  1.1  oster       log->next = NULL;
    489  1.1  oster       regionID = log->regionID;
    490  1.1  oster       ReintegrateRegion(raidPtr, regionID, log);
    491  1.1  oster       log->numRecords = 0;
    492  1.1  oster 
    493  1.1  oster       /* remove all items which are blocked on reintegration of this region */
    494  1.1  oster       RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    495  1.1  oster       logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
    496  1.1  oster       logDataList = logData;
    497  1.1  oster       while (logData)
    498  1.1  oster 	{
    499  1.1  oster 	  logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
    500  1.1  oster 	  logData = logData->next;
    501  1.1  oster 	}
    502  1.1  oster       RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    503  1.1  oster 
    504  1.1  oster       /* process blocked log data and clear reintInProgress flag for this region */
    505  1.1  oster       if (logDataList)
    506  1.1  oster 	rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE);
    507  1.1  oster       else
    508  1.1  oster 	{
    509  1.1  oster 	  /* Enable flushing for this region.  Holding both locks provides
    510  1.1  oster 	     a synchronization barrier with DumpParityLogToDisk
    511  1.1  oster 	     */
    512  1.1  oster 	  RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    513  1.1  oster 	  RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    514  1.1  oster 	  RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    515  1.1  oster 	  raidPtr->regionInfo[regionID].diskCount = 0;
    516  1.1  oster 	  raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
    517  1.1  oster 	  RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    518  1.1  oster 	  RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now enabled */
    519  1.1  oster 	  RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    520  1.1  oster 	}
    521  1.1  oster       /* if log wasn't used, attach it to the list of logs to be returned */
    522  1.1  oster       if (log)
    523  1.1  oster 	{
    524  1.1  oster 	  log->next = freeLogList;
    525  1.1  oster 	  freeLogList = log;
    526  1.1  oster 	}
    527  1.1  oster     }
    528  1.1  oster   if (freeLogList)
    529  1.1  oster     rf_ReleaseParityLogs(raidPtr, freeLogList);
    530  1.1  oster }
    531  1.1  oster 
    532  1.1  oster int rf_ShutdownLogging(RF_Raid_t *raidPtr)
    533  1.1  oster {
    534  1.1  oster   /* shutdown parity logging
    535  1.1  oster      1) disable parity logging in all regions
    536  1.1  oster      2) reintegrate all regions
    537  1.1  oster      */
    538  1.1  oster 
    539  1.1  oster   RF_SectorCount_t diskCount;
    540  1.1  oster   RF_RegionId_t regionID;
    541  1.1  oster   RF_ParityLog_t *log;
    542  1.1  oster 
    543  1.1  oster   if (rf_parityLogDebug)
    544  1.1  oster     printf("[shutting down parity logging]\n");
    545  1.1  oster   /* Since parity log maps are volatile, we must reintegrate all regions. */
    546  1.1  oster   if (rf_forceParityLogReint) {
    547  1.1  oster     for (regionID = 0; regionID < rf_numParityRegions; regionID++)
    548  1.1  oster       {
    549  1.1  oster 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    550  1.1  oster 	raidPtr->regionInfo[regionID].loggingEnabled = RF_FALSE;
    551  1.1  oster 	log = raidPtr->regionInfo[regionID].coreLog;
    552  1.1  oster 	raidPtr->regionInfo[regionID].coreLog = NULL;
    553  1.1  oster 	diskCount = raidPtr->regionInfo[regionID].diskCount;
    554  1.1  oster 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    555  1.1  oster 	if (diskCount > 0 || log != NULL)
    556  1.1  oster 	  ReintegrateRegion(raidPtr, regionID, log);
    557  1.1  oster 	if (log != NULL)
    558  1.1  oster 	  rf_ReleaseParityLogs(raidPtr, log);
    559  1.1  oster       }
    560  1.1  oster   }
    561  1.1  oster   if (rf_parityLogDebug)
    562  1.1  oster     {
    563  1.1  oster       printf("[parity logging disabled]\n");
    564  1.1  oster       printf("[should be done!]\n");
    565  1.1  oster     }
    566  1.1  oster   return(0);
    567  1.1  oster }
    568  1.1  oster 
    569  1.1  oster int rf_ParityLoggingDiskManager(RF_Raid_t *raidPtr)
    570  1.1  oster {
    571  1.1  oster   RF_ParityLog_t *reintQueue, *flushQueue;
    572  1.1  oster   int workNeeded, done = RF_FALSE;
    573  1.1  oster 
    574  1.1  oster   rf_assign_threadid(); /* don't remove this line */
    575  1.1  oster 
    576  1.1  oster   /* Main program for parity logging disk thread.  This routine waits
    577  1.1  oster      for work to appear in either the flush or reintegration queues
    578  1.1  oster      and is responsible for flushing core logs to the log disk as
    579  1.1  oster      well as reintegrating parity regions.
    580  1.1  oster 
    581  1.1  oster      BLOCKING
    582  1.1  oster      */
    583  1.1  oster 
    584  1.1  oster   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    585  1.1  oster 
    586  1.1  oster   /*
    587  1.1  oster    * Inform our creator that we're running. Don't bother doing the
    588  1.1  oster    * mutex lock/unlock dance- we locked above, and we'll unlock
    589  1.1  oster    * below with nothing to do, yet.
    590  1.1  oster    */
    591  1.1  oster   raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING;
    592  1.1  oster   RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
    593  1.1  oster 
    594  1.1  oster   /* empty the work queues */
    595  1.1  oster   flushQueue = raidPtr->parityLogDiskQueue.flushQueue;  raidPtr->parityLogDiskQueue.flushQueue = NULL;
    596  1.1  oster   reintQueue = raidPtr->parityLogDiskQueue.reintQueue;  raidPtr->parityLogDiskQueue.reintQueue = NULL;
    597  1.1  oster   workNeeded = (flushQueue || reintQueue);
    598  1.1  oster 
    599  1.1  oster   while (!done)
    600  1.1  oster     {
    601  1.1  oster       while (workNeeded)
    602  1.1  oster 	{
    603  1.1  oster 	  /* First, flush all logs in the flush queue, freeing buffers
    604  1.1  oster 	     Second, reintegrate all regions which are reported as full.
    605  1.1  oster 	     Third, append queued log data until blocked.
    606  1.1  oster 
    607  1.1  oster 	     Note: Incoming appends (ParityLogAppend) can block on either
    608  1.1  oster 	       1. empty buffer pool
    609  1.1  oster 	       2. region under reintegration
    610  1.1  oster 	     To preserve a global FIFO ordering of appends, buffers are not
    611  1.1  oster 	     released to the world until those appends blocked on buffers are
    612  1.1  oster 	     removed from the append queue.  Similarly, regions which are
    613  1.1  oster 	     reintegrated are not opened for general use until the append
    614  1.1  oster 	     queue has been emptied.
    615  1.1  oster 	     */
    616  1.1  oster 
    617  1.1  oster 	  RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    618  1.1  oster 
    619  1.1  oster 	  /* empty flushQueue, using free'd log buffers to process bufTail */
    620  1.1  oster 	  if (flushQueue)
    621  1.1  oster 	    FlushLogsToDisk(raidPtr, flushQueue);
    622  1.1  oster 
    623  1.1  oster 	  /* empty reintQueue, flushing from reintTail as we go */
    624  1.1  oster 	  if (reintQueue)
    625  1.1  oster 	    ReintegrateLogs(raidPtr, reintQueue);
    626  1.1  oster 
    627  1.1  oster 	  RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    628  1.1  oster 	  flushQueue = raidPtr->parityLogDiskQueue.flushQueue;  raidPtr->parityLogDiskQueue.flushQueue = NULL;
    629  1.1  oster 	  reintQueue = raidPtr->parityLogDiskQueue.reintQueue;  raidPtr->parityLogDiskQueue.reintQueue = NULL;
    630  1.1  oster 	  workNeeded = (flushQueue || reintQueue);
    631  1.1  oster 	}
    632  1.1  oster       /* no work is needed at this point */
    633  1.1  oster       if (raidPtr->parityLogDiskQueue.threadState&RF_PLOG_TERMINATE)
    634  1.1  oster 	{
    635  1.1  oster 	  /* shutdown parity logging
    636  1.1  oster 	     1. disable parity logging in all regions
    637  1.1  oster 	     2. reintegrate all regions
    638  1.1  oster 	     */
    639  1.1  oster 	  done = RF_TRUE;  /* thread disabled, no work needed */
    640  1.1  oster 	  RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    641  1.1  oster 	  rf_ShutdownLogging(raidPtr);
    642  1.1  oster 	}
    643  1.1  oster       if (!done)
    644  1.1  oster 	{
    645  1.1  oster 	  /* thread enabled, no work needed, so sleep */
    646  1.1  oster 	  if (rf_parityLogDebug)
    647  1.1  oster 	    printf("[parity logging disk manager sleeping]\n");
    648  1.1  oster 	  RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex);
    649  1.1  oster 	  if (rf_parityLogDebug)
    650  1.1  oster 	    printf("[parity logging disk manager just woke up]\n");
    651  1.1  oster 	  flushQueue = raidPtr->parityLogDiskQueue.flushQueue;  raidPtr->parityLogDiskQueue.flushQueue = NULL;
    652  1.1  oster 	  reintQueue = raidPtr->parityLogDiskQueue.reintQueue;  raidPtr->parityLogDiskQueue.reintQueue = NULL;
    653  1.1  oster 	  workNeeded = (flushQueue || reintQueue);
    654  1.1  oster 	}
    655  1.1  oster     }
    656  1.1  oster   /*
    657  1.1  oster    * Announce that we're done.
    658  1.1  oster    */
    659  1.1  oster   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    660  1.1  oster   raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN;
    661  1.1  oster   RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    662  1.1  oster   RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
    663  1.1  oster #if defined(__NetBSD__) && defined(_KERNEL)
    664  1.1  oster   /*
    665  1.1  oster    * In the NetBSD kernel, the thread must exit; returning would
    666  1.1  oster    * cause the proc trampoline to attempt to return to userspace.
    667  1.1  oster    */
    668  1.1  oster   kthread_exit(0);	/* does not return */
    669  1.1  oster #else
    670  1.1  oster   return(0);
    671  1.1  oster #endif
    672  1.1  oster }
    673  1.1  oster 
    674  1.1  oster #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
    675