Home | History | Annotate | Line # | Download | only in raidframe
rf_paritylog.c revision 1.13
      1 /*	$NetBSD: rf_paritylog.c,v 1.13 2007/03/04 06:02:38 christos Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: William V. Courtright II
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /* Code for manipulating in-core parity logs
     30  *
     31  */
     32 
     33 #include <sys/cdefs.h>
     34 __KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.13 2007/03/04 06:02:38 christos Exp $");
     35 
     36 #include "rf_archs.h"
     37 
     38 #if RF_INCLUDE_PARITYLOGGING > 0
     39 
     40 /*
     41  * Append-only log for recording parity "update" and "overwrite" records
     42  */
     43 
     44 #include <dev/raidframe/raidframevar.h>
     45 
     46 #include "rf_threadstuff.h"
     47 #include "rf_mcpair.h"
     48 #include "rf_raid.h"
     49 #include "rf_dag.h"
     50 #include "rf_dagfuncs.h"
     51 #include "rf_desc.h"
     52 #include "rf_layout.h"
     53 #include "rf_diskqueue.h"
     54 #include "rf_etimer.h"
     55 #include "rf_paritylog.h"
     56 #include "rf_general.h"
     57 #include "rf_map.h"
     58 #include "rf_paritylogging.h"
     59 #include "rf_paritylogDiskMgr.h"
     60 
     61 static RF_CommonLogData_t *
     62 AllocParityLogCommonData(RF_Raid_t * raidPtr)
     63 {
     64 	RF_CommonLogData_t *common = NULL;
     65 
     66 	/* Return a struct for holding common parity log information from the
     67 	 * free list (rf_parityLogDiskQueue.freeCommonList).  If the free list
     68 	 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
     69 
     70 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
     71 	if (raidPtr->parityLogDiskQueue.freeCommonList) {
     72 		common = raidPtr->parityLogDiskQueue.freeCommonList;
     73 		raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
     74 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
     75 	} else {
     76 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
     77 		RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
     78 		rf_mutex_init(&common->mutex);
     79 	}
     80 	common->next = NULL;
     81 	return (common);
     82 }
     83 
     84 static void
     85 FreeParityLogCommonData(RF_CommonLogData_t * common)
     86 {
     87 	RF_Raid_t *raidPtr;
     88 
     89 	/* Insert a single struct for holding parity log information (data)
     90 	 * into the free list (rf_parityLogDiskQueue.freeCommonList).
     91 	 * NON-BLOCKING */
     92 
     93 	raidPtr = common->raidPtr;
     94 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
     95 	common->next = raidPtr->parityLogDiskQueue.freeCommonList;
     96 	raidPtr->parityLogDiskQueue.freeCommonList = common;
     97 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
     98 }
     99 
    100 static RF_ParityLogData_t *
    101 AllocParityLogData(RF_Raid_t * raidPtr)
    102 {
    103 	RF_ParityLogData_t *data = NULL;
    104 
    105 	/* Return a struct for holding parity log information from the free
    106 	 * list (rf_parityLogDiskQueue.freeList).  If the free list is empty,
    107 	 * call RF_Malloc to create a new structure. NON-BLOCKING */
    108 
    109 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    110 	if (raidPtr->parityLogDiskQueue.freeDataList) {
    111 		data = raidPtr->parityLogDiskQueue.freeDataList;
    112 		raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
    113 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    114 	} else {
    115 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    116 		RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
    117 	}
    118 	data->next = NULL;
    119 	data->prev = NULL;
    120 	return (data);
    121 }
    122 
    123 
    124 static void
    125 FreeParityLogData(RF_ParityLogData_t * data)
    126 {
    127 	RF_ParityLogData_t *nextItem;
    128 	RF_Raid_t *raidPtr;
    129 
    130 	/* Insert a linked list of structs for holding parity log information
    131 	 * (data) into the free list (parityLogDiskQueue.freeList).
    132 	 * NON-BLOCKING */
    133 
    134 	raidPtr = data->common->raidPtr;
    135 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    136 	while (data) {
    137 		nextItem = data->next;
    138 		data->next = raidPtr->parityLogDiskQueue.freeDataList;
    139 		raidPtr->parityLogDiskQueue.freeDataList = data;
    140 		data = nextItem;
    141 	}
    142 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    143 }
    144 
    145 
    146 static void
    147 EnqueueParityLogData(
    148     RF_ParityLogData_t * data,
    149     RF_ParityLogData_t ** head,
    150     RF_ParityLogData_t ** tail)
    151 {
    152 	RF_Raid_t *raidPtr;
    153 
    154 	/* Insert an in-core parity log (*data) into the head of a disk queue
    155 	 * (*head, *tail). NON-BLOCKING */
    156 
    157 	raidPtr = data->common->raidPtr;
    158 	if (rf_parityLogDebug)
    159 		printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
    160 	RF_ASSERT(data->prev == NULL);
    161 	RF_ASSERT(data->next == NULL);
    162 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    163 	if (*head) {
    164 		/* insert into head of queue */
    165 		RF_ASSERT((*head)->prev == NULL);
    166 		RF_ASSERT((*tail)->next == NULL);
    167 		data->next = *head;
    168 		(*head)->prev = data;
    169 		*head = data;
    170 	} else {
    171 		/* insert into empty list */
    172 		RF_ASSERT(*head == NULL);
    173 		RF_ASSERT(*tail == NULL);
    174 		*head = data;
    175 		*tail = data;
    176 	}
    177 	RF_ASSERT((*head)->prev == NULL);
    178 	RF_ASSERT((*tail)->next == NULL);
    179 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    180 }
    181 
    182 static RF_ParityLogData_t *
    183 DequeueParityLogData(
    184     RF_Raid_t * raidPtr,
    185     RF_ParityLogData_t ** head,
    186     RF_ParityLogData_t ** tail,
    187     int ignoreLocks)
    188 {
    189 	RF_ParityLogData_t *data;
    190 
    191 	/* Remove and return an in-core parity log from the tail of a disk
    192 	 * queue (*head, *tail). NON-BLOCKING */
    193 
    194 	/* remove from tail, preserving FIFO order */
    195 	if (!ignoreLocks)
    196 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    197 	data = *tail;
    198 	if (data) {
    199 		if (*head == *tail) {
    200 			/* removing last item from queue */
    201 			*head = NULL;
    202 			*tail = NULL;
    203 		} else {
    204 			*tail = (*tail)->prev;
    205 			(*tail)->next = NULL;
    206 			RF_ASSERT((*head)->prev == NULL);
    207 			RF_ASSERT((*tail)->next == NULL);
    208 		}
    209 		data->next = NULL;
    210 		data->prev = NULL;
    211 		if (rf_parityLogDebug)
    212 			printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
    213 	}
    214 	if (*head) {
    215 		RF_ASSERT((*head)->prev == NULL);
    216 		RF_ASSERT((*tail)->next == NULL);
    217 	}
    218 	if (!ignoreLocks)
    219 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    220 	return (data);
    221 }
    222 
    223 
    224 static void
    225 RequeueParityLogData(
    226     RF_ParityLogData_t * data,
    227     RF_ParityLogData_t ** head,
    228     RF_ParityLogData_t ** tail)
    229 {
    230 	RF_Raid_t *raidPtr;
    231 
    232 	/* Insert an in-core parity log (*data) into the tail of a disk queue
    233 	 * (*head, *tail). NON-BLOCKING */
    234 
    235 	raidPtr = data->common->raidPtr;
    236 	RF_ASSERT(data);
    237 	if (rf_parityLogDebug)
    238 		printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
    239 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    240 	if (*tail) {
    241 		/* append to tail of list */
    242 		data->prev = *tail;
    243 		data->next = NULL;
    244 		(*tail)->next = data;
    245 		*tail = data;
    246 	} else {
    247 		/* inserting into an empty list */
    248 		*head = data;
    249 		*tail = data;
    250 		(*head)->prev = NULL;
    251 		(*tail)->next = NULL;
    252 	}
    253 	RF_ASSERT((*head)->prev == NULL);
    254 	RF_ASSERT((*tail)->next == NULL);
    255 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    256 }
    257 
    258 RF_ParityLogData_t *
    259 rf_CreateParityLogData(
    260     RF_ParityRecordType_t operation,
    261     RF_PhysDiskAddr_t * pda,
    262     void *bufPtr,
    263     RF_Raid_t * raidPtr,
    264     int (*wakeFunc) (RF_DagNode_t * node, int status),
    265     void *wakeArg,
    266     RF_AccTraceEntry_t * tracerec,
    267     RF_Etimer_t startTime)
    268 {
    269 	RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
    270 	RF_CommonLogData_t *common;
    271 	RF_PhysDiskAddr_t *diskAddress;
    272 	int     boundary, offset = 0;
    273 
    274 	/* Return an initialized struct of info to be logged. Build one item
    275 	 * per physical disk address, one item per region.
    276 	 *
    277 	 * NON-BLOCKING */
    278 
    279 	diskAddress = pda;
    280 	common = AllocParityLogCommonData(raidPtr);
    281 	RF_ASSERT(common);
    282 
    283 	common->operation = operation;
    284 	common->bufPtr = bufPtr;
    285 	common->raidPtr = raidPtr;
    286 	common->wakeFunc = wakeFunc;
    287 	common->wakeArg = wakeArg;
    288 	common->tracerec = tracerec;
    289 	common->startTime = startTime;
    290 	common->cnt = 0;
    291 
    292 	if (rf_parityLogDebug)
    293 		printf("[entering CreateParityLogData]\n");
    294 	while (diskAddress) {
    295 		common->cnt++;
    296 		data = AllocParityLogData(raidPtr);
    297 		RF_ASSERT(data);
    298 		data->common = common;
    299 		data->next = NULL;
    300 		data->prev = NULL;
    301 		data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
    302 		if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
    303 			/* disk address does not cross a region boundary */
    304 			data->diskAddress = *diskAddress;
    305 			data->bufOffset = offset;
    306 			offset = offset + diskAddress->numSector;
    307 			EnqueueParityLogData(data, &resultHead, &resultTail);
    308 			/* adjust disk address */
    309 			diskAddress = diskAddress->next;
    310 		} else {
    311 			/* disk address crosses a region boundary */
    312 			/* find address where region is crossed */
    313 			boundary = 0;
    314 			while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
    315 				boundary++;
    316 
    317 			/* enter data before the boundary */
    318 			data->diskAddress = *diskAddress;
    319 			data->diskAddress.numSector = boundary;
    320 			data->bufOffset = offset;
    321 			offset += boundary;
    322 			EnqueueParityLogData(data, &resultHead, &resultTail);
    323 			/* adjust disk address */
    324 			diskAddress->startSector += boundary;
    325 			diskAddress->numSector -= boundary;
    326 		}
    327 	}
    328 	if (rf_parityLogDebug)
    329 		printf("[leaving CreateParityLogData]\n");
    330 	return (resultHead);
    331 }
    332 
    333 
    334 RF_ParityLogData_t *
    335 rf_SearchAndDequeueParityLogData(
    336     RF_Raid_t * raidPtr,
    337     int regionID,
    338     RF_ParityLogData_t ** head,
    339     RF_ParityLogData_t ** tail,
    340     int ignoreLocks)
    341 {
    342 	RF_ParityLogData_t *w;
    343 
    344 	/* Remove and return an in-core parity log from a specified region
    345 	 * (regionID). If a matching log is not found, return NULL.
    346 	 *
    347 	 * NON-BLOCKING. */
    348 
    349 	/* walk backward through a list, looking for an entry with a matching
    350 	 * region ID */
    351 	if (!ignoreLocks)
    352 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    353 	w = (*tail);
    354 	while (w) {
    355 		if (w->regionID == regionID) {
    356 			/* remove an element from the list */
    357 			if (w == *tail) {
    358 				if (*head == *tail) {
    359 					/* removing only element in the list */
    360 					*head = NULL;
    361 					*tail = NULL;
    362 				} else {
    363 					/* removing last item in the list */
    364 					*tail = (*tail)->prev;
    365 					(*tail)->next = NULL;
    366 					RF_ASSERT((*head)->prev == NULL);
    367 					RF_ASSERT((*tail)->next == NULL);
    368 				}
    369 			} else {
    370 				if (w == *head) {
    371 					/* removing first item in the list */
    372 					*head = (*head)->next;
    373 					(*head)->prev = NULL;
    374 					RF_ASSERT((*head)->prev == NULL);
    375 					RF_ASSERT((*tail)->next == NULL);
    376 				} else {
    377 					/* removing an item from the middle of
    378 					 * the list */
    379 					w->prev->next = w->next;
    380 					w->next->prev = w->prev;
    381 					RF_ASSERT((*head)->prev == NULL);
    382 					RF_ASSERT((*tail)->next == NULL);
    383 				}
    384 			}
    385 			w->prev = NULL;
    386 			w->next = NULL;
    387 			if (rf_parityLogDebug)
    388 				printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
    389 			return (w);
    390 		} else
    391 			w = w->prev;
    392 	}
    393 	if (!ignoreLocks)
    394 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    395 	return (NULL);
    396 }
    397 
    398 static RF_ParityLogData_t *
    399 DequeueMatchingLogData(
    400     RF_Raid_t * raidPtr,
    401     RF_ParityLogData_t ** head,
    402     RF_ParityLogData_t ** tail)
    403 {
    404 	RF_ParityLogData_t *logDataList, *logData;
    405 	int     regionID;
    406 
    407 	/* Remove and return an in-core parity log from the tail of a disk
    408 	 * queue (*head, *tail).  Then remove all matching (identical
    409 	 * regionIDs) logData and return as a linked list.
    410 	 *
    411 	 * NON-BLOCKING */
    412 
    413 	logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
    414 	if (logDataList) {
    415 		regionID = logDataList->regionID;
    416 		logData = logDataList;
    417 		logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
    418 		while (logData->next) {
    419 			logData = logData->next;
    420 			logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
    421 		}
    422 	}
    423 	return (logDataList);
    424 }
    425 
    426 
    427 static RF_ParityLog_t *
    428 AcquireParityLog(
    429     RF_ParityLogData_t * logData,
    430     int finish)
    431 {
    432 	RF_ParityLog_t *log = NULL;
    433 	RF_Raid_t *raidPtr;
    434 
    435 	/* Grab a log buffer from the pool and return it. If no buffers are
    436 	 * available, return NULL. NON-BLOCKING */
    437 	raidPtr = logData->common->raidPtr;
    438 	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
    439 	if (raidPtr->parityLogPool.parityLogs) {
    440 		log = raidPtr->parityLogPool.parityLogs;
    441 		raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
    442 		log->regionID = logData->regionID;
    443 		log->numRecords = 0;
    444 		log->next = NULL;
    445 		raidPtr->logsInUse++;
    446 		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
    447 	} else {
    448 		/* no logs available, so place ourselves on the queue of work
    449 		 * waiting on log buffers this is done while
    450 		 * parityLogPool.mutex is held, to ensure synchronization with
    451 		 * ReleaseParityLogs. */
    452 		if (rf_parityLogDebug)
    453 			printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
    454 		if (finish)
    455 			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
    456 		else
    457 			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
    458 	}
    459 	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
    460 	return (log);
    461 }
    462 
    463 void
    464 rf_ReleaseParityLogs(
    465     RF_Raid_t * raidPtr,
    466     RF_ParityLog_t * firstLog)
    467 {
    468 	RF_ParityLogData_t *logDataList;
    469 	RF_ParityLog_t *log, *lastLog;
    470 	int     cnt;
    471 
    472 	/* Insert a linked list of parity logs (firstLog) to the free list
    473 	 * (parityLogPool.parityLogPool)
    474 	 *
    475 	 * NON-BLOCKING. */
    476 
    477 	RF_ASSERT(firstLog);
    478 
    479 	/* Before returning logs to global free list, service all requests
    480 	 * which are blocked on logs.  Holding mutexes for parityLogPool and
    481 	 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
    482 	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
    483 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    484 	logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
    485 	log = firstLog;
    486 	if (firstLog)
    487 		firstLog = firstLog->next;
    488 	log->numRecords = 0;
    489 	log->next = NULL;
    490 	while (logDataList && log) {
    491 		RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
    492 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    493 		rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
    494 		if (rf_parityLogDebug)
    495 			printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
    496 		if (log == NULL) {
    497 			log = firstLog;
    498 			if (firstLog) {
    499 				firstLog = firstLog->next;
    500 				log->numRecords = 0;
    501 				log->next = NULL;
    502 			}
    503 		}
    504 		RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
    505 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    506 		if (log)
    507 			logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
    508 	}
    509 	/* return remaining logs to pool */
    510 	if (log) {
    511 		log->next = firstLog;
    512 		firstLog = log;
    513 	}
    514 	if (firstLog) {
    515 		lastLog = firstLog;
    516 		raidPtr->logsInUse--;
    517 		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
    518 		while (lastLog->next) {
    519 			lastLog = lastLog->next;
    520 			raidPtr->logsInUse--;
    521 			RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
    522 		}
    523 		lastLog->next = raidPtr->parityLogPool.parityLogs;
    524 		raidPtr->parityLogPool.parityLogs = firstLog;
    525 		cnt = 0;
    526 		log = raidPtr->parityLogPool.parityLogs;
    527 		while (log) {
    528 			cnt++;
    529 			log = log->next;
    530 		}
    531 		RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
    532 	}
    533 	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
    534 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    535 }
    536 
    537 static void
    538 ReintLog(
    539     RF_Raid_t * raidPtr,
    540     int regionID,
    541     RF_ParityLog_t * log)
    542 {
    543 	RF_ASSERT(log);
    544 
    545 	/* Insert an in-core parity log (log) into the disk queue of
    546 	 * reintegration work.  Set the flag (reintInProgress) for the
    547 	 * specified region (regionID) to indicate that reintegration is in
    548 	 * progress for this region. NON-BLOCKING */
    549 
    550 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    551 	raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;	/* cleared when reint
    552 									 * complete */
    553 
    554 	if (rf_parityLogDebug)
    555 		printf("[requesting reintegration of region %d]\n", log->regionID);
    556 	/* move record to reintegration queue */
    557 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    558 	log->next = raidPtr->parityLogDiskQueue.reintQueue;
    559 	raidPtr->parityLogDiskQueue.reintQueue = log;
    560 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    561 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    562 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
    563 }
    564 
    565 static void
    566 FlushLog(
    567     RF_Raid_t * raidPtr,
    568     RF_ParityLog_t * log)
    569 {
    570 	/* insert a core log (log) into a list of logs
    571 	 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
    572 	 * NON-BLOCKING */
    573 
    574 	RF_ASSERT(log);
    575 	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
    576 	RF_ASSERT(log->next == NULL);
    577 	/* move log to flush queue */
    578 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    579 	log->next = raidPtr->parityLogDiskQueue.flushQueue;
    580 	raidPtr->parityLogDiskQueue.flushQueue = log;
    581 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    582 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
    583 }
    584 
    585 static int
    586 DumpParityLogToDisk(
    587     int finish,
    588     RF_ParityLogData_t * logData)
    589 {
    590 	int     i, diskCount, regionID = logData->regionID;
    591 	RF_ParityLog_t *log;
    592 	RF_Raid_t *raidPtr;
    593 
    594 	raidPtr = logData->common->raidPtr;
    595 
    596 	/* Move a core log to disk.  If the log disk is full, initiate
    597 	 * reintegration.
    598 	 *
    599 	 * Return (0) if we can enqueue the dump immediately, otherwise return
    600 	 * (1) to indicate we are blocked on reintegration and control of the
    601 	 * thread should be relinquished.
    602 	 *
    603 	 * Caller must hold regionInfo[regionID].mutex
    604 	 *
    605 	 * NON-BLOCKING */
    606 
    607 	if (rf_parityLogDebug)
    608 		printf("[dumping parity log to disk, region %d]\n", regionID);
    609 	log = raidPtr->regionInfo[regionID].coreLog;
    610 	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
    611 	RF_ASSERT(log->next == NULL);
    612 
    613 	/* if reintegration is in progress, must queue work */
    614 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    615 	if (raidPtr->regionInfo[regionID].reintInProgress) {
    616 		/* Can not proceed since this region is currently being
    617 		 * reintegrated. We can not block, so queue remaining work and
    618 		 * return */
    619 		if (rf_parityLogDebug)
    620 			printf("[region %d waiting on reintegration]\n", regionID);
    621 		/* XXX not sure about the use of finish - shouldn't this
    622 		 * always be "Enqueue"? */
    623 		if (finish)
    624 			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
    625 		else
    626 			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
    627 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    628 		return (1);	/* relenquish control of this thread */
    629 	}
    630 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    631 	raidPtr->regionInfo[regionID].coreLog = NULL;
    632 	if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
    633 		/* IMPORTANT!! this loop bound assumes region disk holds an
    634 		 * integral number of core logs */
    635 	{
    636 		/* update disk map for this region */
    637 		diskCount = raidPtr->regionInfo[regionID].diskCount;
    638 		for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
    639 			raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
    640 			raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
    641 		}
    642 		log->diskOffset = diskCount;
    643 		raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
    644 		FlushLog(raidPtr, log);
    645 	} else {
    646 		/* no room for log on disk, send it to disk manager and
    647 		 * request reintegration */
    648 		RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
    649 		ReintLog(raidPtr, regionID, log);
    650 	}
    651 	if (rf_parityLogDebug)
    652 		printf("[finished dumping parity log to disk, region %d]\n", regionID);
    653 	return (0);
    654 }
    655 
    656 int
    657 rf_ParityLogAppend(
    658     RF_ParityLogData_t * logData,
    659     int finish,
    660     RF_ParityLog_t ** incomingLog,
    661     int clearReintFlag)
    662 {
    663 	int     regionID, logItem, itemDone;
    664 	RF_ParityLogData_t *item;
    665 	int     punt, done = RF_FALSE;
    666 	RF_ParityLog_t *log;
    667 	RF_Raid_t *raidPtr;
    668 	RF_Etimer_t timer;
    669 	int     (*wakeFunc) (RF_DagNode_t * node, int status);
    670 	void   *wakeArg;
    671 
    672 	/* Add parity to the appropriate log, one sector at a time. This
    673 	 * routine is called is called by dag functions ParityLogUpdateFunc
    674 	 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
    675 	 *
    676 	 * Parity to be logged is contained in a linked-list (logData).  When
    677 	 * this routine returns, every sector in the list will be in one of
    678 	 * three places: 1) entered into the parity log 2) queued, waiting on
    679 	 * reintegration 3) queued, waiting on a core log
    680 	 *
    681 	 * Blocked work is passed to the ParityLoggingDiskManager for completion.
    682 	 * Later, as conditions which required the block are removed, the work
    683 	 * reenters this routine with the "finish" parameter set to "RF_TRUE."
    684 	 *
    685 	 * NON-BLOCKING */
    686 
    687 	raidPtr = logData->common->raidPtr;
    688 	/* lock the region for the first item in logData */
    689 	RF_ASSERT(logData != NULL);
    690 	regionID = logData->regionID;
    691 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    692 	RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
    693 
    694 	if (clearReintFlag) {
    695 		/* Enable flushing for this region.  Holding both locks
    696 		 * provides a synchronization barrier with DumpParityLogToDisk */
    697 		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    698 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    699 		RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
    700 		raidPtr->regionInfo[regionID].diskCount = 0;
    701 		raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
    702 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);	/* flushing is now
    703 										 * enabled */
    704 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    705 	}
    706 	/* process each item in logData */
    707 	while (logData) {
    708 		/* remove an item from logData */
    709 		item = logData;
    710 		logData = logData->next;
    711 		item->next = NULL;
    712 		item->prev = NULL;
    713 
    714 		if (rf_parityLogDebug)
    715 			printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
    716 
    717 		/* see if we moved to a new region */
    718 		if (regionID != item->regionID) {
    719 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    720 			regionID = item->regionID;
    721 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    722 			RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
    723 		}
    724 		punt = RF_FALSE;/* Set to RF_TRUE if work is blocked.  This
    725 				 * can happen in one of two ways: 1) no core
    726 				 * log (AcquireParityLog) 2) waiting on
    727 				 * reintegration (DumpParityLogToDisk) If punt
    728 				 * is RF_TRUE, the dataItem was queued, so
    729 				 * skip to next item. */
    730 
    731 		/* process item, one sector at a time, until all sectors
    732 		 * processed or we punt */
    733 		if (item->diskAddress.numSector > 0)
    734 			done = RF_FALSE;
    735 		else
    736 			RF_ASSERT(0);
    737 		while (!punt && !done) {
    738 			/* verify that a core log exists for this region */
    739 			if (!raidPtr->regionInfo[regionID].coreLog) {
    740 				/* Attempt to acquire a parity log. If
    741 				 * acquisition fails, queue remaining work in
    742 				 * data item and move to nextItem. */
    743 				if (incomingLog)
    744 					if (*incomingLog) {
    745 						RF_ASSERT((*incomingLog)->next == NULL);
    746 						raidPtr->regionInfo[regionID].coreLog = *incomingLog;
    747 						raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
    748 						*incomingLog = NULL;
    749 					} else
    750 						raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
    751 				else
    752 					raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
    753 				/* Note: AcquireParityLog either returns a log
    754 				 * or enqueues currentItem */
    755 			}
    756 			if (!raidPtr->regionInfo[regionID].coreLog)
    757 				punt = RF_TRUE;	/* failed to find a core log */
    758 			else {
    759 				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
    760 				/* verify that the log has room for new
    761 				 * entries */
    762 				/* if log is full, dump it to disk and grab a
    763 				 * new log */
    764 				if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
    765 					/* log is full, dump it to disk */
    766 					if (DumpParityLogToDisk(finish, item))
    767 						punt = RF_TRUE;	/* dump unsuccessful,
    768 								 * blocked on
    769 								 * reintegration */
    770 					else {
    771 						/* dump was successful */
    772 						if (incomingLog)
    773 							if (*incomingLog) {
    774 								RF_ASSERT((*incomingLog)->next == NULL);
    775 								raidPtr->regionInfo[regionID].coreLog = *incomingLog;
    776 								raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
    777 								*incomingLog = NULL;
    778 							} else
    779 								raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
    780 						else
    781 							raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
    782 						/* if a core log is not
    783 						 * available, must queue work
    784 						 * and return */
    785 						if (!raidPtr->regionInfo[regionID].coreLog)
    786 							punt = RF_TRUE;	/* blocked on log
    787 									 * availability */
    788 					}
    789 				}
    790 			}
    791 			/* if we didn't punt on this item, attempt to add a
    792 			 * sector to the core log */
    793 			if (!punt) {
    794 				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
    795 				/* at this point, we have a core log with
    796 				 * enough room for a sector */
    797 				/* copy a sector into the log */
    798 				log = raidPtr->regionInfo[regionID].coreLog;
    799 				RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
    800 				logItem = log->numRecords++;
    801 				log->records[logItem].parityAddr = item->diskAddress;
    802 				RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
    803 				RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
    804 				log->records[logItem].parityAddr.numSector = 1;
    805 				log->records[logItem].operation = item->common->operation;
    806 				memcpy((char *)log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), ((char *)item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), (1 << item->common->raidPtr->logBytesPerSector));
    807 				item->diskAddress.numSector--;
    808 				item->diskAddress.startSector++;
    809 				if (item->diskAddress.numSector == 0)
    810 					done = RF_TRUE;
    811 			}
    812 		}
    813 
    814 		if (!punt) {
    815 			/* Processed this item completely, decrement count of
    816 			 * items to be processed. */
    817 			RF_ASSERT(item->diskAddress.numSector == 0);
    818 			RF_LOCK_MUTEX(item->common->mutex);
    819 			item->common->cnt--;
    820 			if (item->common->cnt == 0)
    821 				itemDone = RF_TRUE;
    822 			else
    823 				itemDone = RF_FALSE;
    824 			RF_UNLOCK_MUTEX(item->common->mutex);
    825 			if (itemDone) {
    826 				/* Finished processing all log data for this
    827 				 * IO Return structs to free list and invoke
    828 				 * wakeup function. */
    829 				timer = item->common->startTime;	/* grab initial value of
    830 									 * timer */
    831 				RF_ETIMER_STOP(timer);
    832 				RF_ETIMER_EVAL(timer);
    833 				item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
    834 				if (rf_parityLogDebug)
    835 					printf("[waking process for region %d]\n", item->regionID);
    836 				wakeFunc = item->common->wakeFunc;
    837 				wakeArg = item->common->wakeArg;
    838 				FreeParityLogCommonData(item->common);
    839 				FreeParityLogData(item);
    840 				(wakeFunc) (wakeArg, 0);
    841 			} else
    842 				FreeParityLogData(item);
    843 		}
    844 	}
    845 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    846 	if (rf_parityLogDebug)
    847 		printf("[exiting ParityLogAppend]\n");
    848 	return (0);
    849 }
    850 
    851 
    852 void
    853 rf_EnableParityLogging(RF_Raid_t * raidPtr)
    854 {
    855 	int     regionID;
    856 
    857 	for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
    858 		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    859 		raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
    860 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    861 	}
    862 	if (rf_parityLogDebug)
    863 		printf("[parity logging enabled]\n");
    864 }
    865 #endif				/* RF_INCLUDE_PARITYLOGGING > 0 */
    866