Home | History | Annotate | Line # | Download | only in raidframe
rf_paritylogDiskMgr.c revision 1.1
      1 /*	$NetBSD: rf_paritylogDiskMgr.c,v 1.1 1998/11/13 04:20:31 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: William V. Courtright II
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 /* Code for flushing and reintegration operations related to parity logging.
     29  *
     30  * :
     31  * Log: rf_paritylogDiskMgr.c,v
     32  * Revision 1.25  1996/07/28 20:31:39  jimz
     33  * i386netbsd port
     34  * true/false fixup
     35  *
     36  * Revision 1.24  1996/07/27  23:36:08  jimz
     37  * Solaris port of simulator
     38  *
     39  * Revision 1.23  1996/07/22  19:52:16  jimz
     40  * switched node params to RF_DagParam_t, a union of
     41  * a 64-bit int and a void *, for better portability
     42  * attempted hpux port, but failed partway through for
     43  * lack of a single C compiler capable of compiling all
     44  * source files
     45  *
     46  * Revision 1.22  1996/06/11  10:17:33  jimz
     47  * Put in thread startup/shutdown mechanism for proper synchronization
     48  * with start and end of day routines.
     49  *
     50  * Revision 1.21  1996/06/09  02:36:46  jimz
     51  * lots of little crufty cleanup- fixup whitespace
     52  * issues, comment #ifdefs, improve typing in some
     53  * places (esp size-related)
     54  *
     55  * Revision 1.20  1996/06/07  21:33:04  jimz
     56  * begin using consistent types for sector numbers,
     57  * stripe numbers, row+col numbers, recon unit numbers
     58  *
     59  * Revision 1.19  1996/06/05  18:06:02  jimz
     60  * Major code cleanup. The Great Renaming is now done.
     61  * Better modularity. Better typing. Fixed a bunch of
     62  * synchronization bugs. Made a lot of global stuff
     63  * per-desc or per-array. Removed dead code.
     64  *
     65  * Revision 1.18  1996/06/02  17:31:48  jimz
     66  * Moved a lot of global stuff into array structure, where it belongs.
     67  * Fixed up paritylogging, pss modules in this manner. Some general
     68  * code cleanup. Removed lots of dead code, some dead files.
     69  *
     70  * Revision 1.17  1996/05/31  22:26:54  jimz
     71  * fix a lot of mapping problems, memory allocation problems
     72  * found some weird lock issues, fixed 'em
     73  * more code cleanup
     74  *
     75  * Revision 1.16  1996/05/30  23:22:16  jimz
     76  * bugfixes of serialization, timing problems
     77  * more cleanup
     78  *
     79  * Revision 1.15  1996/05/30  12:59:18  jimz
     80  * make etimer happier, more portable
     81  *
     82  * Revision 1.14  1996/05/30  11:29:41  jimz
     83  * Numerous bug fixes. Stripe lock release code disagreed with the taking code
     84  * about when stripes should be locked (I made it consistent: no parity, no lock)
     85  * There was a lot of extra serialization of I/Os which I've removed- a lot of
     86  * it was to calculate values for the cache code, which is no longer with us.
     87  * More types, function, macro cleanup. Added code to properly quiesce the array
     88  * on shutdown. Made a lot of stuff array-specific which was (bogusly) general
     89  * before. Fixed memory allocation, freeing bugs.
     90  *
     91  * Revision 1.13  1996/05/27  18:56:37  jimz
     92  * more code cleanup
     93  * better typing
     94  * compiles in all 3 environments
     95  *
     96  * Revision 1.12  1996/05/24  22:17:04  jimz
     97  * continue code + namespace cleanup
     98  * typed a bunch of flags
     99  *
    100  * Revision 1.11  1996/05/24  04:28:55  jimz
    101  * release cleanup ckpt
    102  *
    103  * Revision 1.10  1996/05/23  21:46:35  jimz
    104  * checkpoint in code cleanup (release prep)
    105  * lots of types, function names have been fixed
    106  *
    107  * Revision 1.9  1996/05/23  00:33:23  jimz
    108  * code cleanup: move all debug decls to rf_options.c, all extern
    109  * debug decls to rf_options.h, all debug vars preceded by rf_
    110  *
    111  * Revision 1.8  1996/05/18  19:51:34  jimz
    112  * major code cleanup- fix syntax, make some types consistent,
    113  * add prototypes, clean out dead code, et cetera
    114  *
    115  * Revision 1.7  1995/12/12  18:10:06  jimz
    116  * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
    117  * fix 80-column brain damage in comments
    118  *
    119  * Revision 1.6  1995/12/06  20:58:27  wvcii
    120  * added prototypes
    121  *
    122  * Revision 1.5  1995/11/30  16:06:05  wvcii
    123  * added copyright info
    124  *
    125  * Revision 1.4  1995/10/09  22:41:10  wvcii
    126  * minor bug fix
    127  *
    128  * Revision 1.3  1995/10/08  20:43:47  wvcii
    129  * lots of random debugging - debugging still incomplete
    130  *
    131  * Revision 1.2  1995/09/07  15:52:19  jimz
    132  * noop compile when INCLUDE_PARITYLOGGING not defined
    133  *
    134  * Revision 1.1  1995/09/06  19:24:44  wvcii
    135  * Initial revision
    136  *
    137  */
    138 
    139 #include "rf_archs.h"
    140 
    141 #if RF_INCLUDE_PARITYLOGGING > 0
    142 
    143 #include "rf_types.h"
    144 #include "rf_threadstuff.h"
    145 #include "rf_mcpair.h"
    146 #include "rf_raid.h"
    147 #include "rf_dag.h"
    148 #include "rf_dagfuncs.h"
    149 #include "rf_desc.h"
    150 #include "rf_layout.h"
    151 #include "rf_diskqueue.h"
    152 #include "rf_paritylog.h"
    153 #include "rf_general.h"
    154 #include "rf_threadid.h"
    155 #include "rf_etimer.h"
    156 #include "rf_paritylogging.h"
    157 #include "rf_engine.h"
    158 #include "rf_dagutils.h"
    159 #include "rf_map.h"
    160 #include "rf_parityscan.h"
    161 #include "rf_sys.h"
    162 
    163 #include "rf_paritylogDiskMgr.h"
    164 
    165 static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *);
    166 
    167 static caddr_t AcquireReintBuffer(pool)
    168   RF_RegionBufferQueue_t  *pool;
    169 {
    170   caddr_t bufPtr = NULL;
    171 
    172   /* Return a region buffer from the free list (pool).
    173      If the free list is empty, WAIT.
    174      BLOCKING */
    175 
    176   RF_LOCK_MUTEX(pool->mutex);
    177   if (pool->availableBuffers > 0) {
    178     bufPtr = pool->buffers[pool->availBuffersIndex];
    179     pool->availableBuffers--;
    180     pool->availBuffersIndex++;
    181     if (pool->availBuffersIndex == pool->totalBuffers)
    182       pool->availBuffersIndex = 0;
    183     RF_UNLOCK_MUTEX(pool->mutex);
    184   }
    185   else {
    186     RF_PANIC(); /* should never happen in currect config, single reint */
    187     RF_WAIT_COND(pool->cond, pool->mutex);
    188   }
    189   return(bufPtr);
    190 }
    191 
    192 static void ReleaseReintBuffer(
    193   RF_RegionBufferQueue_t  *pool,
    194   caddr_t                  bufPtr)
    195 {
    196   /* Insert a region buffer (bufPtr) into the free list (pool).
    197      NON-BLOCKING */
    198 
    199   RF_LOCK_MUTEX(pool->mutex);
    200   pool->availableBuffers++;
    201   pool->buffers[pool->emptyBuffersIndex] = bufPtr;
    202   pool->emptyBuffersIndex++;
    203   if (pool->emptyBuffersIndex == pool->totalBuffers)
    204     pool->emptyBuffersIndex = 0;
    205   RF_ASSERT(pool->availableBuffers <= pool->totalBuffers);
    206   RF_UNLOCK_MUTEX(pool->mutex);
    207   RF_SIGNAL_COND(pool->cond);
    208 }
    209 
    210 
    211 
    212 static void ReadRegionLog(
    213   RF_RegionId_t         regionID,
    214   RF_MCPair_t          *rrd_mcpair,
    215   caddr_t               regionBuffer,
    216   RF_Raid_t            *raidPtr,
    217   RF_DagHeader_t      **rrd_dag_h,
    218   RF_AllocListElem_t  **rrd_alloclist,
    219   RF_PhysDiskAddr_t   **rrd_pda)
    220 {
    221   /* Initiate the read a region log from disk.  Once initiated, return
    222      to the calling routine.
    223 
    224      NON-BLOCKING
    225    */
    226 
    227   RF_AccTraceEntry_t tracerec;
    228   RF_DagNode_t *rrd_rdNode;
    229 
    230   /* create DAG to read region log from disk */
    231   rf_MakeAllocList(*rrd_alloclist);
    232   *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, rf_DiskReadFunc, rf_DiskReadUndoFunc,
    233 			     "Rrl", *rrd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
    234 
    235   /* create and initialize PDA for the core log */
    236   /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
    237   *rrd_pda = rf_AllocPDAList(1);
    238   rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), &((*rrd_pda)->col), &((*rrd_pda)->startSector));
    239   (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity;
    240 
    241   if ((*rrd_pda)->next) {
    242     (*rrd_pda)->next = NULL;
    243     printf("set rrd_pda->next to NULL\n");
    244   }
    245 
    246   /* initialize DAG parameters */
    247   bzero((char *)&tracerec,sizeof(tracerec));
    248   (*rrd_dag_h)->tracerec = &tracerec;
    249   rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0];
    250   rrd_rdNode->params[0].p = *rrd_pda;
    251 /*  rrd_rdNode->params[1] = regionBuffer; */
    252   rrd_rdNode->params[2].v = 0;
    253   rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
    254 
    255   /* launch region log read dag */
    256   rf_DispatchDAG(*rrd_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
    257 		 (void *) rrd_mcpair);
    258 }
    259 
    260 
    261 
    262 static void WriteCoreLog(
    263   RF_ParityLog_t       *log,
    264   RF_MCPair_t          *fwr_mcpair,
    265   RF_Raid_t            *raidPtr,
    266   RF_DagHeader_t      **fwr_dag_h,
    267   RF_AllocListElem_t  **fwr_alloclist,
    268   RF_PhysDiskAddr_t   **fwr_pda)
    269 {
    270   RF_RegionId_t regionID = log->regionID;
    271   RF_AccTraceEntry_t tracerec;
    272   RF_SectorNum_t regionOffset;
    273   RF_DagNode_t *fwr_wrNode;
    274 
    275   /* Initiate the write of a core log to a region log disk.
    276      Once initiated, return to the calling routine.
    277 
    278      NON-BLOCKING
    279    */
    280 
    281   /* create DAG to write a core log to a region log disk */
    282   rf_MakeAllocList(*fwr_alloclist);
    283   *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    284 			     "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
    285 
    286   /* create and initialize PDA for the region log */
    287   /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
    288   *fwr_pda = rf_AllocPDAList(1);
    289   regionOffset = log->diskOffset;
    290   rf_MapLogParityLogging(raidPtr, regionID, regionOffset, &((*fwr_pda)->row), &((*fwr_pda)->col), &((*fwr_pda)->startSector));
    291   (*fwr_pda)->numSector = raidPtr->numSectorsPerLog;
    292 
    293   /* initialize DAG parameters */
    294   bzero((char *)&tracerec,sizeof(tracerec));
    295   (*fwr_dag_h)->tracerec = &tracerec;
    296   fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0];
    297   fwr_wrNode->params[0].p = *fwr_pda;
    298 /*  fwr_wrNode->params[1] = log->bufPtr; */
    299   fwr_wrNode->params[2].v = 0;
    300   fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
    301 
    302   /* launch the dag to write the core log to disk */
    303   rf_DispatchDAG(*fwr_dag_h, (void (*)(void *)) rf_MCPairWakeupFunc,
    304 		 (void *) fwr_mcpair);
    305 }
    306 
    307 
    308 static void ReadRegionParity(
    309   RF_RegionId_t         regionID,
    310   RF_MCPair_t          *prd_mcpair,
    311   caddr_t               parityBuffer,
    312   RF_Raid_t            *raidPtr,
    313   RF_DagHeader_t      **prd_dag_h,
    314   RF_AllocListElem_t  **prd_alloclist,
    315   RF_PhysDiskAddr_t   **prd_pda)
    316 {
    317   /* Initiate the read region parity from disk.
    318      Once initiated, return to the calling routine.
    319 
    320      NON-BLOCKING
    321    */
    322 
    323   RF_AccTraceEntry_t tracerec;
    324   RF_DagNode_t *prd_rdNode;
    325 
    326   /* create DAG to read region parity from disk */
    327   rf_MakeAllocList(*prd_alloclist);
    328   *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, rf_DiskReadUndoFunc,
    329 			     "Rrp", *prd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
    330 
    331   /* create and initialize PDA for region parity */
    332   /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
    333   *prd_pda = rf_AllocPDAList(1);
    334   rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), &((*prd_pda)->col), &((*prd_pda)->startSector), &((*prd_pda)->numSector));
    335   if (rf_parityLogDebug)
    336     printf("[reading %d sectors of parity from region %d]\n",
    337 	   (int)(*prd_pda)->numSector, regionID);
    338   if ((*prd_pda)->next) {
    339     (*prd_pda)->next = NULL;
    340     printf("set prd_pda->next to NULL\n");
    341   }
    342 
    343   /* initialize DAG parameters */
    344   bzero((char *)&tracerec,sizeof(tracerec));
    345   (*prd_dag_h)->tracerec = &tracerec;
    346   prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0];
    347   prd_rdNode->params[0].p = *prd_pda;
    348   prd_rdNode->params[1].p = parityBuffer;
    349   prd_rdNode->params[2].v = 0;
    350   prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
    351   if (rf_validateDAGDebug)
    352     rf_ValidateDAG(*prd_dag_h);
    353   /* launch region parity read dag */
    354   rf_DispatchDAG(*prd_dag_h, (void (*)(void *)) rf_MCPairWakeupFunc,
    355 		 (void *) prd_mcpair);
    356 }
    357 
    358 static void WriteRegionParity(
    359   RF_RegionId_t         regionID,
    360   RF_MCPair_t          *pwr_mcpair,
    361   caddr_t               parityBuffer,
    362   RF_Raid_t            *raidPtr,
    363   RF_DagHeader_t      **pwr_dag_h,
    364   RF_AllocListElem_t  **pwr_alloclist,
    365   RF_PhysDiskAddr_t   **pwr_pda)
    366 {
    367   /* Initiate the write of region parity to disk.
    368      Once initiated, return to the calling routine.
    369 
    370      NON-BLOCKING
    371    */
    372 
    373   RF_AccTraceEntry_t tracerec;
    374   RF_DagNode_t *pwr_wrNode;
    375 
    376   /* create DAG to write region log from disk */
    377   rf_MakeAllocList(*pwr_alloclist);
    378   *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    379 			     "Wrp", *pwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
    380 
    381   /* create and initialize PDA for region parity */
    382   /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
    383   *pwr_pda = rf_AllocPDAList(1);
    384   rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), &((*pwr_pda)->col), &((*pwr_pda)->startSector), &((*pwr_pda)->numSector));
    385 
    386   /* initialize DAG parameters */
    387   bzero((char *)&tracerec,sizeof(tracerec));
    388   (*pwr_dag_h)->tracerec = &tracerec;
    389   pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0];
    390   pwr_wrNode->params[0].p = *pwr_pda;
    391 /*  pwr_wrNode->params[1] = parityBuffer; */
    392   pwr_wrNode->params[2].v = 0;
    393   pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
    394 
    395   /* launch the dag to write region parity to disk */
    396   rf_DispatchDAG(*pwr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
    397 			      (void *) pwr_mcpair);
    398 }
    399 
    400 static void FlushLogsToDisk(
    401   RF_Raid_t       *raidPtr,
    402   RF_ParityLog_t  *logList)
    403 {
    404   /* Flush a linked list of core logs to the log disk.
    405      Logs contain the disk location where they should be
    406      written.  Logs were written in FIFO order and that
    407      order must be preserved.
    408 
    409      Recommended optimizations:
    410        1) allow multiple flushes to occur simultaneously
    411        2) coalesce contiguous flush operations
    412 
    413      BLOCKING
    414      */
    415 
    416   RF_ParityLog_t *log;
    417   RF_RegionId_t regionID;
    418   RF_MCPair_t *fwr_mcpair;
    419   RF_DagHeader_t *fwr_dag_h;
    420   RF_AllocListElem_t *fwr_alloclist;
    421   RF_PhysDiskAddr_t *fwr_pda;
    422 
    423   fwr_mcpair = rf_AllocMCPair();
    424   RF_LOCK_MUTEX(fwr_mcpair->mutex);
    425 
    426   RF_ASSERT(logList);
    427   log = logList;
    428   while (log)
    429     {
    430       regionID = log->regionID;
    431 
    432       /* create and launch a DAG to write the core log */
    433       if (rf_parityLogDebug)
    434 	printf("[initiating write of core log for region %d]\n", regionID);
    435       fwr_mcpair->flag = RF_FALSE;
    436       WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, &fwr_alloclist, &fwr_pda);
    437 
    438       /* wait for the DAG to complete */
    439 #ifndef SIMULATE
    440       while (!fwr_mcpair->flag)
    441 	RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex);
    442 #endif /* !SIMULATE */
    443       if (fwr_dag_h->status != rf_enable)
    444 	{
    445 	  RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID);
    446 	  RF_ASSERT(0);
    447 	}
    448 
    449       /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */
    450       rf_FreePhysDiskAddr(fwr_pda);
    451       rf_FreeDAG(fwr_dag_h);
    452       rf_FreeAllocList(fwr_alloclist);
    453 
    454       log = log->next;
    455     }
    456   RF_UNLOCK_MUTEX(fwr_mcpair->mutex);
    457   rf_FreeMCPair(fwr_mcpair);
    458   rf_ReleaseParityLogs(raidPtr, logList);
    459 }
    460 
    461 static void ReintegrateRegion(
    462   RF_Raid_t       *raidPtr,
    463   RF_RegionId_t    regionID,
    464   RF_ParityLog_t  *coreLog)
    465 {
    466   RF_MCPair_t *rrd_mcpair=NULL, *prd_mcpair, *pwr_mcpair;
    467   RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h;
    468   RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist;
    469   RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda;
    470   caddr_t parityBuffer, regionBuffer=NULL;
    471 
    472   /* Reintegrate a region (regionID).
    473      1. acquire region and parity buffers
    474      2. read log from disk
    475      3. read parity from disk
    476      4. apply log to parity
    477      5. apply core log to parity
    478      6. write new parity to disk
    479 
    480      BLOCKING
    481     */
    482 
    483   if (rf_parityLogDebug)
    484     printf("[reintegrating region %d]\n", regionID);
    485 
    486   /* initiate read of region parity */
    487   if (rf_parityLogDebug)
    488     printf("[initiating read of parity for region %d]\n", regionID);
    489   parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool);
    490   prd_mcpair = rf_AllocMCPair();
    491   RF_LOCK_MUTEX(prd_mcpair->mutex);
    492   prd_mcpair->flag = RF_FALSE;
    493   ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, &prd_dag_h, &prd_alloclist, &prd_pda);
    494 
    495   /* if region log nonempty, initiate read */
    496   if (raidPtr->regionInfo[regionID].diskCount > 0)
    497     {
    498       if (rf_parityLogDebug)
    499 	printf("[initiating read of disk log for region %d]\n", regionID);
    500       regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool);
    501       rrd_mcpair = rf_AllocMCPair();
    502       RF_LOCK_MUTEX(rrd_mcpair->mutex);
    503       rrd_mcpair->flag = RF_FALSE;
    504       ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, &rrd_dag_h, &rrd_alloclist, &rrd_pda);
    505     }
    506 
    507   /* wait on read of region parity to complete */
    508 #ifndef SIMULATE
    509   while (!prd_mcpair->flag) {
    510     RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex);
    511   }
    512 #endif /* !SIMULATE */
    513   RF_UNLOCK_MUTEX(prd_mcpair->mutex);
    514   if (prd_dag_h->status != rf_enable)
    515     {
    516       RF_ERRORMSG("Unable to read parity from disk\n");
    517       /* add code to fail the parity disk */
    518       RF_ASSERT(0);
    519     }
    520 
    521   /* apply core log to parity */
    522   /*  if (coreLog)
    523       ApplyLogsToParity(coreLog, parityBuffer); */
    524 
    525   if (raidPtr->regionInfo[regionID].diskCount > 0)
    526     {
    527       /* wait on read of region log to complete */
    528 #ifndef SIMULATE
    529       while (!rrd_mcpair->flag)
    530 	RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex);
    531 #endif /* !SIMULATE */
    532       RF_UNLOCK_MUTEX(rrd_mcpair->mutex);
    533       if (rrd_dag_h->status != rf_enable)
    534 	{
    535 	  RF_ERRORMSG("Unable to read region log from disk\n");
    536 	  /* add code to fail the log disk */
    537 	  RF_ASSERT(0);
    538 	}
    539       /* apply region log to parity */
    540       /*      ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */
    541       /* release resources associated with region log */
    542       /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */
    543       rf_FreePhysDiskAddr(rrd_pda);
    544       rf_FreeDAG(rrd_dag_h);
    545       rf_FreeAllocList(rrd_alloclist);
    546       rf_FreeMCPair(rrd_mcpair);
    547       ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer);
    548     }
    549 
    550   /* write reintegrated parity to disk */
    551   if (rf_parityLogDebug)
    552     printf("[initiating write of parity for region %d]\n", regionID);
    553   pwr_mcpair = rf_AllocMCPair();
    554   RF_LOCK_MUTEX(pwr_mcpair->mutex);
    555   pwr_mcpair->flag = RF_FALSE;
    556   WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, &pwr_dag_h, &pwr_alloclist, &pwr_pda);
    557 #ifndef SIMULATE
    558   while (!pwr_mcpair->flag)
    559     RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex);
    560 #endif /* !SIMULATE */
    561   RF_UNLOCK_MUTEX(pwr_mcpair->mutex);
    562   if (pwr_dag_h->status != rf_enable)
    563     {
    564       RF_ERRORMSG("Unable to write parity to disk\n");
    565       /* add code to fail the parity disk */
    566       RF_ASSERT(0);
    567     }
    568 
    569   /* release resources associated with read of old parity */
    570   /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */
    571   rf_FreePhysDiskAddr(prd_pda);
    572   rf_FreeDAG(prd_dag_h);
    573   rf_FreeAllocList(prd_alloclist);
    574   rf_FreeMCPair(prd_mcpair);
    575 
    576   /* release resources associated with write of new parity */
    577   ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer);
    578   /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */
    579   rf_FreePhysDiskAddr(pwr_pda);
    580   rf_FreeDAG(pwr_dag_h);
    581   rf_FreeAllocList(pwr_alloclist);
    582   rf_FreeMCPair(pwr_mcpair);
    583 
    584   if (rf_parityLogDebug)
    585     printf("[finished reintegrating region %d]\n", regionID);
    586 }
    587 
    588 
    589 
    590 static void ReintegrateLogs(
    591   RF_Raid_t       *raidPtr,
    592   RF_ParityLog_t  *logList)
    593 {
    594   RF_ParityLog_t *log, *freeLogList = NULL;
    595   RF_ParityLogData_t *logData, *logDataList;
    596   RF_RegionId_t regionID;
    597 
    598   RF_ASSERT(logList);
    599   while (logList)
    600     {
    601       log = logList;
    602       logList = logList->next;
    603       log->next = NULL;
    604       regionID = log->regionID;
    605       ReintegrateRegion(raidPtr, regionID, log);
    606       log->numRecords = 0;
    607 
    608       /* remove all items which are blocked on reintegration of this region */
    609       RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    610       logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
    611       logDataList = logData;
    612       while (logData)
    613 	{
    614 	  logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
    615 	  logData = logData->next;
    616 	}
    617       RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    618 
    619       /* process blocked log data and clear reintInProgress flag for this region */
    620       if (logDataList)
    621 	rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE);
    622       else
    623 	{
    624 	  /* Enable flushing for this region.  Holding both locks provides
    625 	     a synchronization barrier with DumpParityLogToDisk
    626 	     */
    627 	  RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    628 	  RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    629 	  RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    630 	  raidPtr->regionInfo[regionID].diskCount = 0;
    631 	  raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
    632 	  RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    633 	  RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now enabled */
    634 	  RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    635 	}
    636       /* if log wasn't used, attach it to the list of logs to be returned */
    637       if (log)
    638 	{
    639 	  log->next = freeLogList;
    640 	  freeLogList = log;
    641 	}
    642     }
    643   if (freeLogList)
    644     rf_ReleaseParityLogs(raidPtr, freeLogList);
    645 }
    646 
    647 int rf_ShutdownLogging(RF_Raid_t *raidPtr)
    648 {
    649   /* shutdown parity logging
    650      1) disable parity logging in all regions
    651      2) reintegrate all regions
    652      */
    653 
    654   RF_SectorCount_t diskCount;
    655   RF_RegionId_t regionID;
    656   RF_ParityLog_t *log;
    657 
    658   if (rf_parityLogDebug)
    659     printf("[shutting down parity logging]\n");
    660   /* Since parity log maps are volatile, we must reintegrate all regions. */
    661   if (rf_forceParityLogReint) {
    662     for (regionID = 0; regionID < rf_numParityRegions; regionID++)
    663       {
    664 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    665 	raidPtr->regionInfo[regionID].loggingEnabled = RF_FALSE;
    666 	log = raidPtr->regionInfo[regionID].coreLog;
    667 	raidPtr->regionInfo[regionID].coreLog = NULL;
    668 	diskCount = raidPtr->regionInfo[regionID].diskCount;
    669 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    670 	if (diskCount > 0 || log != NULL)
    671 	  ReintegrateRegion(raidPtr, regionID, log);
    672 	if (log != NULL)
    673 	  rf_ReleaseParityLogs(raidPtr, log);
    674       }
    675   }
    676   if (rf_parityLogDebug)
    677     {
    678       printf("[parity logging disabled]\n");
    679       printf("[should be done!]\n");
    680     }
    681   return(0);
    682 }
    683 
    684 int rf_ParityLoggingDiskManager(RF_Raid_t *raidPtr)
    685 {
    686   RF_ParityLog_t *reintQueue, *flushQueue;
    687   int workNeeded, done = RF_FALSE;
    688 
    689   rf_assign_threadid(); /* don't remove this line */
    690 
    691   /* Main program for parity logging disk thread.  This routine waits
    692      for work to appear in either the flush or reintegration queues
    693      and is responsible for flushing core logs to the log disk as
    694      well as reintegrating parity regions.
    695 
    696      BLOCKING
    697      */
    698 
    699   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    700 
    701   /*
    702    * Inform our creator that we're running. Don't bother doing the
    703    * mutex lock/unlock dance- we locked above, and we'll unlock
    704    * below with nothing to do, yet.
    705    */
    706   raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING;
    707   RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
    708 
    709   /* empty the work queues */
    710   flushQueue = raidPtr->parityLogDiskQueue.flushQueue;  raidPtr->parityLogDiskQueue.flushQueue = NULL;
    711   reintQueue = raidPtr->parityLogDiskQueue.reintQueue;  raidPtr->parityLogDiskQueue.reintQueue = NULL;
    712   workNeeded = (flushQueue || reintQueue);
    713 
    714   while (!done)
    715     {
    716       while (workNeeded)
    717 	{
    718 	  /* First, flush all logs in the flush queue, freeing buffers
    719 	     Second, reintegrate all regions which are reported as full.
    720 	     Third, append queued log data until blocked.
    721 
    722 	     Note: Incoming appends (ParityLogAppend) can block on either
    723 	       1. empty buffer pool
    724 	       2. region under reintegration
    725 	     To preserve a global FIFO ordering of appends, buffers are not
    726 	     released to the world until those appends blocked on buffers are
    727 	     removed from the append queue.  Similarly, regions which are
    728 	     reintegrated are not opened for general use until the append
    729 	     queue has been emptied.
    730 	     */
    731 
    732 	  RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    733 
    734 	  /* empty flushQueue, using free'd log buffers to process bufTail */
    735 	  if (flushQueue)
    736 	    FlushLogsToDisk(raidPtr, flushQueue);
    737 
    738 	  /* empty reintQueue, flushing from reintTail as we go */
    739 	  if (reintQueue)
    740 	    ReintegrateLogs(raidPtr, reintQueue);
    741 
    742 	  RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    743 	  flushQueue = raidPtr->parityLogDiskQueue.flushQueue;  raidPtr->parityLogDiskQueue.flushQueue = NULL;
    744 	  reintQueue = raidPtr->parityLogDiskQueue.reintQueue;  raidPtr->parityLogDiskQueue.reintQueue = NULL;
    745 	  workNeeded = (flushQueue || reintQueue);
    746 	}
    747       /* no work is needed at this point */
    748       if (raidPtr->parityLogDiskQueue.threadState&RF_PLOG_TERMINATE)
    749 	{
    750 	  /* shutdown parity logging
    751 	     1. disable parity logging in all regions
    752 	     2. reintegrate all regions
    753 	     */
    754 	  done = RF_TRUE;  /* thread disabled, no work needed */
    755 	  RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    756 	  rf_ShutdownLogging(raidPtr);
    757 	}
    758       if (!done)
    759 	{
    760 	  /* thread enabled, no work needed, so sleep */
    761 	  if (rf_parityLogDebug)
    762 	    printf("[parity logging disk manager sleeping]\n");
    763 	  RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex);
    764 	  if (rf_parityLogDebug)
    765 	    printf("[parity logging disk manager just woke up]\n");
    766 	  flushQueue = raidPtr->parityLogDiskQueue.flushQueue;  raidPtr->parityLogDiskQueue.flushQueue = NULL;
    767 	  reintQueue = raidPtr->parityLogDiskQueue.reintQueue;  raidPtr->parityLogDiskQueue.reintQueue = NULL;
    768 	  workNeeded = (flushQueue || reintQueue);
    769 	}
    770     }
    771   /*
    772    * Announce that we're done.
    773    */
    774   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    775   raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN;
    776   RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    777   RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
    778 #if defined(__NetBSD__) && defined(_KERNEL)
    779   /*
    780    * In the NetBSD kernel, the thread must exit; returning would
    781    * cause the proc trampoline to attempt to return to userspace.
    782    */
    783   kthread_exit(0);	/* does not return */
    784 #else
    785   return(0);
    786 #endif
    787 }
    788 
    789 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
    790