Home | History | Annotate | Line # | Download | only in raidframe
rf_paritylog.c revision 1.2
      1 /*	$NetBSD: rf_paritylog.c,v 1.2 1999/01/26 02:33:59 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: William V. Courtright II
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /* Code for manipulating in-core parity logs
     30  *
     31  */
     32 
     33 #include "rf_archs.h"
     34 
     35 #if RF_INCLUDE_PARITYLOGGING > 0
     36 
     37 /*
     38  * Append-only log for recording parity "update" and "overwrite" records
     39  */
     40 
     41 #include "rf_types.h"
     42 #include "rf_threadstuff.h"
     43 #include "rf_mcpair.h"
     44 #include "rf_raid.h"
     45 #include "rf_dag.h"
     46 #include "rf_dagfuncs.h"
     47 #include "rf_desc.h"
     48 #include "rf_layout.h"
     49 #include "rf_diskqueue.h"
     50 #include "rf_etimer.h"
     51 #include "rf_paritylog.h"
     52 #include "rf_general.h"
     53 #include "rf_threadid.h"
     54 #include "rf_map.h"
     55 #include "rf_paritylogging.h"
     56 #include "rf_paritylogDiskMgr.h"
     57 #include "rf_sys.h"
     58 
     59 static RF_CommonLogData_t *AllocParityLogCommonData(RF_Raid_t *raidPtr)
     60 {
     61   RF_CommonLogData_t *common = NULL;
     62   int rc;
     63 
     64   /* Return a struct for holding common parity log information from the free
     65      list (rf_parityLogDiskQueue.freeCommonList).  If the free list is empty, call
     66      RF_Malloc to create a new structure.
     67      NON-BLOCKING */
     68 
     69   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
     70   if (raidPtr->parityLogDiskQueue.freeCommonList)
     71     {
     72       common = raidPtr->parityLogDiskQueue.freeCommonList;
     73       raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
     74       RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
     75     }
     76   else
     77     {
     78       RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
     79       RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
     80       rc = rf_mutex_init(&common->mutex);
     81       if (rc) {
     82         RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
     83           __LINE__, rc);
     84         RF_Free(common, sizeof(RF_CommonLogData_t));
     85         common = NULL;
     86       }
     87     }
     88   common->next = NULL;
     89   return(common);
     90 }
     91 
     92 static void FreeParityLogCommonData(RF_CommonLogData_t *common)
     93 {
     94   RF_Raid_t *raidPtr;
     95 
     96   /* Insert a single struct for holding parity log information
     97      (data) into the free list (rf_parityLogDiskQueue.freeCommonList).
     98      NON-BLOCKING */
     99 
    100   raidPtr = common->raidPtr;
    101   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    102   common->next = raidPtr->parityLogDiskQueue.freeCommonList;
    103   raidPtr->parityLogDiskQueue.freeCommonList = common;
    104   RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    105 }
    106 
    107 static RF_ParityLogData_t *AllocParityLogData(RF_Raid_t *raidPtr)
    108 {
    109   RF_ParityLogData_t *data = NULL;
    110 
    111   /* Return a struct for holding parity log information from the free
    112      list (rf_parityLogDiskQueue.freeList).  If the free list is empty, call
    113      RF_Malloc to create a new structure.
    114      NON-BLOCKING */
    115 
    116   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    117   if (raidPtr->parityLogDiskQueue.freeDataList)
    118     {
    119       data = raidPtr->parityLogDiskQueue.freeDataList;
    120       raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
    121       RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    122     }
    123   else
    124     {
    125       RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    126       RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
    127     }
    128   data->next = NULL;
    129   data->prev = NULL;
    130   return(data);
    131 }
    132 
    133 
    134 static void FreeParityLogData(RF_ParityLogData_t *data)
    135 {
    136   RF_ParityLogData_t *nextItem;
    137   RF_Raid_t *raidPtr;
    138 
    139   /* Insert a linked list of structs for holding parity log
    140      information (data) into the free list (parityLogDiskQueue.freeList).
    141      NON-BLOCKING */
    142 
    143   raidPtr = data->common->raidPtr;
    144   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    145   while (data)
    146     {
    147       nextItem = data->next;
    148       data->next = raidPtr->parityLogDiskQueue.freeDataList;
    149       raidPtr->parityLogDiskQueue.freeDataList = data;
    150       data = nextItem;
    151     }
    152   RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    153 }
    154 
    155 
    156 static void EnqueueParityLogData(
    157   RF_ParityLogData_t   *data,
    158   RF_ParityLogData_t  **head,
    159   RF_ParityLogData_t  **tail)
    160 {
    161   RF_Raid_t *raidPtr;
    162 
    163   /* Insert an in-core parity log (*data) into the head of
    164      a disk queue (*head, *tail).
    165      NON-BLOCKING */
    166 
    167   raidPtr = data->common->raidPtr;
    168   if (rf_parityLogDebug)
    169     printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int)data->diskAddress.numSector);
    170   RF_ASSERT(data->prev == NULL);
    171   RF_ASSERT(data->next == NULL);
    172   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    173   if (*head)
    174     {
    175       /* insert into head of queue */
    176       RF_ASSERT((*head)->prev == NULL);
    177       RF_ASSERT((*tail)->next == NULL);
    178       data->next = *head;
    179       (*head)->prev = data;
    180       *head = data;
    181     }
    182   else
    183     {
    184       /* insert into empty list */
    185       RF_ASSERT(*head == NULL);
    186       RF_ASSERT(*tail == NULL);
    187       *head = data;
    188       *tail = data;
    189     }
    190   RF_ASSERT((*head)->prev == NULL);
    191   RF_ASSERT((*tail)->next == NULL);
    192   RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    193 }
    194 
    195 static RF_ParityLogData_t *DequeueParityLogData(
    196   RF_Raid_t            *raidPtr,
    197   RF_ParityLogData_t  **head,
    198   RF_ParityLogData_t  **tail,
    199   int                   ignoreLocks)
    200 {
    201   RF_ParityLogData_t *data;
    202 
    203   /* Remove and return an in-core parity log from the tail of
    204      a disk queue (*head, *tail).
    205      NON-BLOCKING */
    206 
    207   /* remove from tail, preserving FIFO order */
    208   if (!ignoreLocks)
    209     RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    210   data = *tail;
    211   if (data)
    212     {
    213       if (*head == *tail)
    214 	{
    215 	  /* removing last item from queue */
    216 	  *head = NULL;
    217 	  *tail = NULL;
    218 	}
    219       else
    220 	{
    221 	  *tail = (*tail)->prev;
    222 	  (*tail)->next = NULL;
    223 	  RF_ASSERT((*head)->prev == NULL);
    224 	  RF_ASSERT((*tail)->next == NULL);
    225 	}
    226       data->next = NULL;
    227       data->prev = NULL;
    228       if (rf_parityLogDebug)
    229 	printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int)data->diskAddress.numSector);
    230     }
    231   if (*head)
    232     {
    233       RF_ASSERT((*head)->prev == NULL);
    234       RF_ASSERT((*tail)->next == NULL);
    235     }
    236   if (!ignoreLocks)
    237     RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    238   return(data);
    239 }
    240 
    241 
    242 static void RequeueParityLogData(
    243   RF_ParityLogData_t   *data,
    244   RF_ParityLogData_t  **head,
    245   RF_ParityLogData_t  **tail)
    246 {
    247   RF_Raid_t *raidPtr;
    248 
    249   /* Insert an in-core parity log (*data) into the tail of
    250      a disk queue (*head, *tail).
    251      NON-BLOCKING */
    252 
    253   raidPtr = data->common->raidPtr;
    254   RF_ASSERT(data);
    255   if (rf_parityLogDebug)
    256     printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
    257   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    258   if (*tail)
    259     {
    260       /* append to tail of list */
    261       data->prev = *tail;
    262       data->next = NULL;
    263       (*tail)->next = data;
    264       *tail = data;
    265     }
    266   else
    267     {
    268       /* inserting into an empty list */
    269       *head = data;
    270       *tail = data;
    271       (*head)->prev = NULL;
    272       (*tail)->next = NULL;
    273     }
    274   RF_ASSERT((*head)->prev == NULL);
    275   RF_ASSERT((*tail)->next == NULL);
    276   RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    277 }
    278 
    279 RF_ParityLogData_t *rf_CreateParityLogData(
    280   RF_ParityRecordType_t    operation,
    281   RF_PhysDiskAddr_t       *pda,
    282   caddr_t                  bufPtr,
    283   RF_Raid_t               *raidPtr,
    284   int                    (*wakeFunc)(RF_DagNode_t *node, int status),
    285   void                    *wakeArg,
    286   RF_AccTraceEntry_t      *tracerec,
    287   RF_Etimer_t              startTime)
    288 {
    289   RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
    290   RF_CommonLogData_t *common;
    291   RF_PhysDiskAddr_t *diskAddress;
    292   int boundary, offset = 0;
    293 
    294   /* Return an initialized struct of info to be logged.
    295      Build one item per physical disk address, one item per region.
    296 
    297      NON-BLOCKING */
    298 
    299   diskAddress = pda;
    300   common = AllocParityLogCommonData(raidPtr);
    301   RF_ASSERT(common);
    302 
    303   common->operation = operation;
    304   common->bufPtr = bufPtr;
    305   common->raidPtr = raidPtr;
    306   common->wakeFunc = wakeFunc;
    307   common->wakeArg = wakeArg;
    308   common->tracerec = tracerec;
    309   common->startTime = startTime;
    310   common->cnt = 0;
    311 
    312   if (rf_parityLogDebug)
    313     printf("[entering CreateParityLogData]\n");
    314   while (diskAddress)
    315     {
    316       common->cnt++;
    317       data = AllocParityLogData(raidPtr);
    318       RF_ASSERT(data);
    319       data->common = common;
    320       data->next = NULL;
    321       data->prev = NULL;
    322       data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
    323       if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1))
    324 	{
    325 	  /* disk address does not cross a region boundary */
    326 	  data->diskAddress = *diskAddress;
    327 	  data->bufOffset = offset;
    328 	  offset = offset + diskAddress->numSector;
    329 	  EnqueueParityLogData(data, &resultHead, &resultTail);
    330 	  /* adjust disk address */
    331 	  diskAddress = diskAddress->next;
    332 	}
    333       else
    334 	{
    335 	  /* disk address crosses a region boundary */
    336 	  /* find address where region is crossed */
    337 	  boundary = 0;
    338 	  while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
    339 	    boundary++;
    340 
    341 	  /* enter data before the boundary */
    342 	  data->diskAddress = *diskAddress;
    343 	  data->diskAddress.numSector = boundary;
    344 	  data->bufOffset = offset;
    345 	  offset += boundary;
    346 	  EnqueueParityLogData(data, &resultHead, &resultTail);
    347 	  /* adjust disk address */
    348 	  diskAddress->startSector += boundary;
    349 	  diskAddress->numSector -= boundary;
    350 	}
    351     }
    352   if (rf_parityLogDebug)
    353     printf("[leaving CreateParityLogData]\n");
    354   return(resultHead);
    355 }
    356 
    357 
    358 RF_ParityLogData_t *rf_SearchAndDequeueParityLogData(
    359   RF_Raid_t            *raidPtr,
    360   int                   regionID,
    361   RF_ParityLogData_t  **head,
    362   RF_ParityLogData_t  **tail,
    363   int                   ignoreLocks)
    364 {
    365   RF_ParityLogData_t *w;
    366 
    367   /* Remove and return an in-core parity log from a specified region (regionID).
    368      If a matching log is not found, return NULL.
    369 
    370      NON-BLOCKING.
    371      */
    372 
    373   /* walk backward through a list, looking for an entry with a matching region ID */
    374   if (!ignoreLocks)
    375     RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    376   w = (*tail);
    377   while (w)
    378     {
    379       if (w->regionID == regionID)
    380 	{
    381 	  /* remove an element from the list */
    382 	  if (w == *tail)
    383 	    {
    384 	      if (*head == *tail)
    385 		{
    386 		  /* removing only element in the list */
    387 		  *head = NULL;
    388 		  *tail = NULL;
    389 		}
    390 	      else
    391 		{
    392 		  /* removing last item in the list */
    393 		  *tail = (*tail)->prev;
    394 		  (*tail)->next = NULL;
    395 		  RF_ASSERT((*head)->prev == NULL);
    396 		  RF_ASSERT((*tail)->next == NULL);
    397 		}
    398 	    }
    399 	  else
    400 	    {
    401 	      if (w == *head)
    402 		{
    403 		  /* removing first item in the list */
    404 		  *head = (*head)->next;
    405 		  (*head)->prev = NULL;
    406 		  RF_ASSERT((*head)->prev == NULL);
    407 		  RF_ASSERT((*tail)->next == NULL);
    408 		}
    409 	      else
    410 		{
    411 		  /* removing an item from the middle of the list */
    412 		  w->prev->next = w->next;
    413 		  w->next->prev = w->prev;
    414 		  RF_ASSERT((*head)->prev == NULL);
    415 		  RF_ASSERT((*tail)->next == NULL);
    416 		}
    417 	    }
    418 	  w->prev = NULL;
    419 	  w->next = NULL;
    420 	  if (rf_parityLogDebug)
    421 	    printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n",w->regionID,(int)w->diskAddress.raidAddress,(int) w->diskAddress.numSector);
    422 	  return(w);
    423 	}
    424       else
    425 	w = w->prev;
    426     }
    427   if (!ignoreLocks)
    428     RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    429   return(NULL);
    430 }
    431 
    432 static RF_ParityLogData_t *DequeueMatchingLogData(
    433   RF_Raid_t            *raidPtr,
    434   RF_ParityLogData_t  **head,
    435   RF_ParityLogData_t  **tail)
    436 {
    437   RF_ParityLogData_t *logDataList, *logData;
    438   int regionID;
    439 
    440   /* Remove and return an in-core parity log from the tail of
    441      a disk queue (*head, *tail).  Then remove all matching
    442      (identical regionIDs) logData and return as a linked list.
    443 
    444      NON-BLOCKING
    445      */
    446 
    447   logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
    448   if (logDataList)
    449     {
    450       regionID = logDataList->regionID;
    451       logData = logDataList;
    452       logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
    453       while (logData->next)
    454 	{
    455 	  logData = logData->next;
    456 	  logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
    457 	}
    458     }
    459   return(logDataList);
    460 }
    461 
    462 
    463 static RF_ParityLog_t *AcquireParityLog(
    464   RF_ParityLogData_t  *logData,
    465   int                  finish)
    466 {
    467   RF_ParityLog_t *log = NULL;
    468   RF_Raid_t *raidPtr;
    469 
    470   /* Grab a log buffer from the pool and return it.
    471      If no buffers are available, return NULL.
    472      NON-BLOCKING
    473      */
    474   raidPtr = logData->common->raidPtr;
    475   RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
    476   if (raidPtr->parityLogPool.parityLogs)
    477     {
    478       log = raidPtr->parityLogPool.parityLogs;
    479       raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
    480       log->regionID = logData->regionID;
    481       log->numRecords = 0;
    482       log->next = NULL;
    483       raidPtr->logsInUse++;
    484       RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
    485     }
    486   else
    487     {
    488       /* no logs available, so place ourselves on the queue of work waiting on log buffers
    489 	 this is done while parityLogPool.mutex is held, to ensure synchronization
    490 	 with ReleaseParityLogs.
    491 	 */
    492       if (rf_parityLogDebug)
    493 	printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
    494       if (finish)
    495 	RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
    496       else
    497 	EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
    498     }
    499   RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
    500   return(log);
    501 }
    502 
    503 void rf_ReleaseParityLogs(
    504   RF_Raid_t       *raidPtr,
    505   RF_ParityLog_t  *firstLog)
    506 {
    507   RF_ParityLogData_t *logDataList;
    508   RF_ParityLog_t *log, *lastLog;
    509   int cnt;
    510 
    511   /* Insert a linked list of parity logs (firstLog) to
    512      the free list (parityLogPool.parityLogPool)
    513 
    514      NON-BLOCKING.
    515      */
    516 
    517   RF_ASSERT(firstLog);
    518 
    519   /* Before returning logs to global free list, service all
    520      requests which are blocked on logs.  Holding mutexes for parityLogPool and parityLogDiskQueue
    521      forces synchronization with AcquireParityLog().
    522      */
    523   RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
    524   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    525   logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
    526   log = firstLog;
    527   if (firstLog)
    528     firstLog = firstLog->next;
    529   log->numRecords = 0;
    530   log->next = NULL;
    531   while (logDataList && log)
    532     {
    533       RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
    534       RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    535       rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
    536       if (rf_parityLogDebug)
    537 	printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
    538       if (log == NULL)
    539 	{
    540 	  log = firstLog;
    541 	  if (firstLog)
    542 	    {
    543 	      firstLog = firstLog->next;
    544 	      log->numRecords = 0;
    545 	      log->next = NULL;
    546 	    }
    547 	}
    548       RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
    549       RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    550       if (log)
    551 	logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
    552     }
    553   /* return remaining logs to pool */
    554   if (log)
    555     {
    556       log->next = firstLog;
    557       firstLog = log;
    558     }
    559   if (firstLog)
    560     {
    561       lastLog = firstLog;
    562       raidPtr->logsInUse--;
    563       RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
    564       while (lastLog->next)
    565 	{
    566 	  lastLog = lastLog->next;
    567 	  raidPtr->logsInUse--;
    568 	  RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
    569 	}
    570       lastLog->next = raidPtr->parityLogPool.parityLogs;
    571       raidPtr->parityLogPool.parityLogs = firstLog;
    572       cnt = 0;
    573       log = raidPtr->parityLogPool.parityLogs;
    574       while (log)
    575 	{
    576 	  cnt++;
    577 	  log = log->next;
    578 	}
    579       RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
    580     }
    581   RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
    582   RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    583 }
    584 
    585 static void ReintLog(
    586   RF_Raid_t       *raidPtr,
    587   int              regionID,
    588   RF_ParityLog_t  *log)
    589 {
    590   RF_ASSERT(log);
    591 
    592   /* Insert an in-core parity log (log) into the disk queue of reintegration
    593      work.  Set the flag (reintInProgress) for the specified region (regionID)
    594      to indicate that reintegration is in progress for this region.
    595      NON-BLOCKING
    596      */
    597 
    598   RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    599   raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;  /* cleared when reint complete */
    600 
    601   if (rf_parityLogDebug)
    602     printf("[requesting reintegration of region %d]\n", log->regionID);
    603   /* move record to reintegration queue */
    604   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    605   log->next = raidPtr->parityLogDiskQueue.reintQueue;
    606   raidPtr->parityLogDiskQueue.reintQueue = log;
    607   RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    608   RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    609   RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
    610 }
    611 
    612 static void FlushLog(
    613   RF_Raid_t       *raidPtr,
    614   RF_ParityLog_t  *log)
    615 {
    616   /* insert a core log (log) into a list of logs (parityLogDiskQueue.flushQueue)
    617      waiting to be written to disk.
    618      NON-BLOCKING
    619      */
    620 
    621   RF_ASSERT(log);
    622   RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
    623   RF_ASSERT(log->next == NULL);
    624   /* move log to flush queue */
    625   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    626   log->next = raidPtr->parityLogDiskQueue.flushQueue;
    627   raidPtr->parityLogDiskQueue.flushQueue = log;
    628   RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    629   RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
    630 }
    631 
    632 static int DumpParityLogToDisk(
    633   int                  finish,
    634   RF_ParityLogData_t  *logData)
    635 {
    636   int i, diskCount, regionID = logData->regionID;
    637   RF_ParityLog_t *log;
    638   RF_Raid_t *raidPtr;
    639 
    640   raidPtr = logData->common->raidPtr;
    641 
    642   /* Move a core log to disk.  If the log disk is full, initiate
    643      reintegration.
    644 
    645      Return (0) if we can enqueue the dump immediately, otherwise
    646      return (1) to indicate we are blocked on reintegration and
    647      control of the thread should be relinquished.
    648 
    649      Caller must hold regionInfo[regionID].mutex
    650 
    651      NON-BLOCKING
    652      */
    653 
    654   if (rf_parityLogDebug)
    655     printf("[dumping parity log to disk, region %d]\n", regionID);
    656   log = raidPtr->regionInfo[regionID].coreLog;
    657   RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
    658   RF_ASSERT(log->next == NULL);
    659 
    660   /* if reintegration is in progress, must queue work */
    661   RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    662   if (raidPtr->regionInfo[regionID].reintInProgress)
    663     {
    664       /* Can not proceed since this region is currently being reintegrated.
    665 	 We can not block, so queue remaining work and return */
    666       if (rf_parityLogDebug)
    667 	printf("[region %d waiting on reintegration]\n",regionID);
    668       /* XXX not sure about the use of finish - shouldn't this always be "Enqueue"? */
    669       if (finish)
    670 	RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
    671       else
    672 	EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
    673       RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    674       return(1);  /* relenquish control of this thread */
    675     }
    676   RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    677   raidPtr->regionInfo[regionID].coreLog = NULL;
    678   if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
    679     /* IMPORTANT!! this loop bound assumes region disk holds an integral number of core logs */
    680     {
    681       /* update disk map for this region */
    682       diskCount = raidPtr->regionInfo[regionID].diskCount;
    683       for (i = 0; i < raidPtr->numSectorsPerLog; i++)
    684 	{
    685 	  raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
    686 	  raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
    687 	}
    688       log->diskOffset = diskCount;
    689       raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
    690       FlushLog(raidPtr, log);
    691     }
    692   else
    693     {
    694       /* no room for log on disk, send it to disk manager and request reintegration */
    695       RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
    696       ReintLog(raidPtr, regionID, log);
    697     }
    698   if (rf_parityLogDebug)
    699     printf("[finished dumping parity log to disk, region %d]\n", regionID);
    700   return(0);
    701 }
    702 
    703 int rf_ParityLogAppend(
    704   RF_ParityLogData_t   *logData,
    705   int                   finish,
    706   RF_ParityLog_t      **incomingLog,
    707   int                   clearReintFlag)
    708 {
    709   int regionID, logItem, itemDone;
    710   RF_ParityLogData_t *item;
    711   int punt, done = RF_FALSE;
    712   RF_ParityLog_t *log;
    713   RF_Raid_t *raidPtr;
    714   RF_Etimer_t timer;
    715   int (*wakeFunc)(RF_DagNode_t *node, int status);
    716   void *wakeArg;
    717 
    718   /* Add parity to the appropriate log, one sector at a time.
    719      This routine is called is called by dag functions ParityLogUpdateFunc
    720      and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
    721 
    722      Parity to be logged is contained in a linked-list (logData).  When
    723      this routine returns, every sector in the list will be in one of
    724      three places:
    725        1) entered into the parity log
    726        2) queued, waiting on reintegration
    727        3) queued, waiting on a core log
    728 
    729      Blocked work is passed to the ParityLoggingDiskManager for completion.
    730      Later, as conditions which required the block are removed, the work
    731      reenters this routine with the "finish" parameter set to "RF_TRUE."
    732 
    733      NON-BLOCKING
    734      */
    735 
    736   raidPtr = logData->common->raidPtr;
    737   /* lock the region for the first item in logData */
    738   RF_ASSERT(logData != NULL);
    739   regionID = logData->regionID;
    740   RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    741   RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
    742 
    743   if (clearReintFlag)
    744     {
    745       /* Enable flushing for this region.  Holding both locks provides
    746 	 a synchronization barrier with DumpParityLogToDisk
    747 	 */
    748       RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    749       RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    750       RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
    751       raidPtr->regionInfo[regionID].diskCount = 0;
    752       raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
    753       RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now enabled */
    754       RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    755     }
    756 
    757   /* process each item in logData */
    758   while (logData)
    759     {
    760       /* remove an item from logData */
    761       item = logData;
    762       logData = logData->next;
    763       item->next = NULL;
    764       item->prev = NULL;
    765 
    766       if (rf_parityLogDebug)
    767 	printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n",item->regionID,(int)item->diskAddress.raidAddress, (int)item->diskAddress.numSector);
    768 
    769       /* see if we moved to a new region */
    770       if (regionID != item->regionID)
    771 	{
    772 	  RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    773 	  regionID = item->regionID;
    774 	  RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    775 	  RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
    776 	}
    777 
    778       punt = RF_FALSE;  /* Set to RF_TRUE if work is blocked.  This can happen in one of two ways:
    779 		          1) no core log (AcquireParityLog)
    780 			  2) waiting on reintegration (DumpParityLogToDisk)
    781 			If punt is RF_TRUE, the dataItem was queued, so skip to next item.
    782 			*/
    783 
    784       /* process item, one sector at a time, until all sectors processed or we punt */
    785       if (item->diskAddress.numSector > 0)
    786 	done = RF_FALSE;
    787       else
    788 	RF_ASSERT(0);
    789       while (!punt && !done)
    790 	{
    791 	  /* verify that a core log exists for this region */
    792 	  if (!raidPtr->regionInfo[regionID].coreLog)
    793 	    {
    794 	      /* Attempt to acquire a parity log.
    795 		 If acquisition fails, queue remaining work in data item and move to nextItem.
    796 		 */
    797 	      if (incomingLog)
    798 		if (*incomingLog)
    799 		  {
    800 		    RF_ASSERT((*incomingLog)->next == NULL);
    801 		    raidPtr->regionInfo[regionID].coreLog = *incomingLog;
    802 		    raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
    803 		    *incomingLog = NULL;
    804 		  }
    805 		else
    806 		  raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
    807 	      else
    808 		raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
    809 	      /* Note: AcquireParityLog either returns a log or enqueues currentItem */
    810 	    }
    811 	  if (!raidPtr->regionInfo[regionID].coreLog)
    812 	    punt = RF_TRUE; /* failed to find a core log */
    813 	  else
    814 	    {
    815 	      RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
    816 	      /* verify that the log has room for new entries */
    817 	      /* if log is full, dump it to disk and grab a new log */
    818 	      if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog)
    819 		{
    820 		  /* log is full, dump it to disk */
    821 		  if (DumpParityLogToDisk(finish, item))
    822 		    punt = RF_TRUE; /* dump unsuccessful, blocked on reintegration */
    823 		  else
    824 		    {
    825 		      /* dump was successful */
    826 		      if (incomingLog)
    827 			if (*incomingLog)
    828 			  {
    829 			    RF_ASSERT((*incomingLog)->next == NULL);
    830 			    raidPtr->regionInfo[regionID].coreLog = *incomingLog;
    831 			    raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
    832 			    *incomingLog = NULL;
    833 			  }
    834 			else
    835 			  raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
    836 		      else
    837 			raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
    838 		      /* if a core log is not available, must queue work and return */
    839 		      if (!raidPtr->regionInfo[regionID].coreLog)
    840 			punt = RF_TRUE; /* blocked on log availability */
    841 		    }
    842 		}
    843 	    }
    844 	  /* if we didn't punt on this item, attempt to add a sector to the core log */
    845 	  if (!punt)
    846 	    {
    847 	      RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
    848 	      /* at this point, we have a core log with enough room for a sector */
    849 	      /* copy a sector into the log */
    850 	      log = raidPtr->regionInfo[regionID].coreLog;
    851 	      RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
    852 	      logItem = log->numRecords++;
    853 	      log->records[logItem].parityAddr = item->diskAddress;
    854 	      RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
    855 	      RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
    856 	      log->records[logItem].parityAddr.numSector = 1;
    857 	      log->records[logItem].operation = item->common->operation;
    858 	      bcopy((item->common->bufPtr + (item->bufOffset++ * (1<<item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1<<item->common->raidPtr->logBytesPerSector)), (1<<item->common->raidPtr->logBytesPerSector));
    859 	      item->diskAddress.numSector--;
    860 	      item->diskAddress.startSector++;
    861 	      if (item->diskAddress.numSector == 0)
    862 		done = RF_TRUE;
    863 	    }
    864 	}
    865 
    866       if (!punt)
    867 	{
    868 	  /* Processed this item completely, decrement count of items
    869 	     to be processed.
    870 	     */
    871 	  RF_ASSERT(item->diskAddress.numSector == 0);
    872 	  RF_LOCK_MUTEX(item->common->mutex);
    873 	  item->common->cnt--;
    874 	  if (item->common->cnt == 0)
    875 	    itemDone = RF_TRUE;
    876 	  else
    877 	    itemDone = RF_FALSE;
    878 	  RF_UNLOCK_MUTEX(item->common->mutex);
    879 	  if (itemDone)
    880 	    {
    881 	      /* Finished processing all log data for this IO
    882 		 Return structs to free list and invoke wakeup function.
    883 		 */
    884 	      timer = item->common->startTime;  /* grab initial value of timer */
    885 	      RF_ETIMER_STOP(timer);
    886 	      RF_ETIMER_EVAL(timer);
    887 	      item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
    888 	      if (rf_parityLogDebug)
    889 		printf("[waking process for region %d]\n", item->regionID);
    890 	      wakeFunc = item->common->wakeFunc;
    891 	      wakeArg = item->common->wakeArg;
    892 	      FreeParityLogCommonData(item->common);
    893 	      FreeParityLogData(item);
    894 	      (wakeFunc)(wakeArg, 0);
    895 	    }
    896 	  else
    897 	    FreeParityLogData(item);
    898 	}
    899     }
    900   RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    901   if (rf_parityLogDebug)
    902     printf("[exiting ParityLogAppend]\n");
    903   return(0);
    904 }
    905 
    906 
    907 void rf_EnableParityLogging(RF_Raid_t *raidPtr)
    908 {
    909   int regionID;
    910 
    911   for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
    912     RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    913     raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
    914     RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    915   }
    916   if (rf_parityLogDebug)
    917     printf("[parity logging enabled]\n");
    918 }
    919 
    920 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
    921