Home | History | Annotate | Line # | Download | only in raidframe
rf_paritylog.c revision 1.7
      1 /*	$NetBSD: rf_paritylog.c,v 1.7 2001/11/13 07:11:15 lukem Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: William V. Courtright II
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /* Code for manipulating in-core parity logs
     30  *
     31  */
     32 
     33 #include <sys/cdefs.h>
     34 __KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.7 2001/11/13 07:11:15 lukem Exp $");
     35 
     36 #include "rf_archs.h"
     37 
     38 #if RF_INCLUDE_PARITYLOGGING > 0
     39 
     40 /*
     41  * Append-only log for recording parity "update" and "overwrite" records
     42  */
     43 
     44 #include <dev/raidframe/raidframevar.h>
     45 
     46 #include "rf_threadstuff.h"
     47 #include "rf_mcpair.h"
     48 #include "rf_raid.h"
     49 #include "rf_dag.h"
     50 #include "rf_dagfuncs.h"
     51 #include "rf_desc.h"
     52 #include "rf_layout.h"
     53 #include "rf_diskqueue.h"
     54 #include "rf_etimer.h"
     55 #include "rf_paritylog.h"
     56 #include "rf_general.h"
     57 #include "rf_map.h"
     58 #include "rf_paritylogging.h"
     59 #include "rf_paritylogDiskMgr.h"
     60 
     61 static RF_CommonLogData_t *
     62 AllocParityLogCommonData(RF_Raid_t * raidPtr)
     63 {
     64 	RF_CommonLogData_t *common = NULL;
     65 	int     rc;
     66 
     67 	/* Return a struct for holding common parity log information from the
     68 	 * free list (rf_parityLogDiskQueue.freeCommonList).  If the free list
     69 	 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
     70 
     71 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
     72 	if (raidPtr->parityLogDiskQueue.freeCommonList) {
     73 		common = raidPtr->parityLogDiskQueue.freeCommonList;
     74 		raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
     75 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
     76 	} else {
     77 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
     78 		RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
     79 		rc = rf_mutex_init(&common->mutex);
     80 		if (rc) {
     81 			RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
     82 			    __LINE__, rc);
     83 			RF_Free(common, sizeof(RF_CommonLogData_t));
     84 			common = NULL;
     85 		}
     86 	}
     87 	common->next = NULL;
     88 	return (common);
     89 }
     90 
     91 static void
     92 FreeParityLogCommonData(RF_CommonLogData_t * common)
     93 {
     94 	RF_Raid_t *raidPtr;
     95 
     96 	/* Insert a single struct for holding parity log information (data)
     97 	 * into the free list (rf_parityLogDiskQueue.freeCommonList).
     98 	 * NON-BLOCKING */
     99 
    100 	raidPtr = common->raidPtr;
    101 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    102 	common->next = raidPtr->parityLogDiskQueue.freeCommonList;
    103 	raidPtr->parityLogDiskQueue.freeCommonList = common;
    104 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    105 }
    106 
    107 static RF_ParityLogData_t *
    108 AllocParityLogData(RF_Raid_t * raidPtr)
    109 {
    110 	RF_ParityLogData_t *data = NULL;
    111 
    112 	/* Return a struct for holding parity log information from the free
    113 	 * list (rf_parityLogDiskQueue.freeList).  If the free list is empty,
    114 	 * call RF_Malloc to create a new structure. NON-BLOCKING */
    115 
    116 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    117 	if (raidPtr->parityLogDiskQueue.freeDataList) {
    118 		data = raidPtr->parityLogDiskQueue.freeDataList;
    119 		raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
    120 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    121 	} else {
    122 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    123 		RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
    124 	}
    125 	data->next = NULL;
    126 	data->prev = NULL;
    127 	return (data);
    128 }
    129 
    130 
    131 static void
    132 FreeParityLogData(RF_ParityLogData_t * data)
    133 {
    134 	RF_ParityLogData_t *nextItem;
    135 	RF_Raid_t *raidPtr;
    136 
    137 	/* Insert a linked list of structs for holding parity log information
    138 	 * (data) into the free list (parityLogDiskQueue.freeList).
    139 	 * NON-BLOCKING */
    140 
    141 	raidPtr = data->common->raidPtr;
    142 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    143 	while (data) {
    144 		nextItem = data->next;
    145 		data->next = raidPtr->parityLogDiskQueue.freeDataList;
    146 		raidPtr->parityLogDiskQueue.freeDataList = data;
    147 		data = nextItem;
    148 	}
    149 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    150 }
    151 
    152 
    153 static void
    154 EnqueueParityLogData(
    155     RF_ParityLogData_t * data,
    156     RF_ParityLogData_t ** head,
    157     RF_ParityLogData_t ** tail)
    158 {
    159 	RF_Raid_t *raidPtr;
    160 
    161 	/* Insert an in-core parity log (*data) into the head of a disk queue
    162 	 * (*head, *tail). NON-BLOCKING */
    163 
    164 	raidPtr = data->common->raidPtr;
    165 	if (rf_parityLogDebug)
    166 		printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
    167 	RF_ASSERT(data->prev == NULL);
    168 	RF_ASSERT(data->next == NULL);
    169 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    170 	if (*head) {
    171 		/* insert into head of queue */
    172 		RF_ASSERT((*head)->prev == NULL);
    173 		RF_ASSERT((*tail)->next == NULL);
    174 		data->next = *head;
    175 		(*head)->prev = data;
    176 		*head = data;
    177 	} else {
    178 		/* insert into empty list */
    179 		RF_ASSERT(*head == NULL);
    180 		RF_ASSERT(*tail == NULL);
    181 		*head = data;
    182 		*tail = data;
    183 	}
    184 	RF_ASSERT((*head)->prev == NULL);
    185 	RF_ASSERT((*tail)->next == NULL);
    186 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    187 }
    188 
    189 static RF_ParityLogData_t *
    190 DequeueParityLogData(
    191     RF_Raid_t * raidPtr,
    192     RF_ParityLogData_t ** head,
    193     RF_ParityLogData_t ** tail,
    194     int ignoreLocks)
    195 {
    196 	RF_ParityLogData_t *data;
    197 
    198 	/* Remove and return an in-core parity log from the tail of a disk
    199 	 * queue (*head, *tail). NON-BLOCKING */
    200 
    201 	/* remove from tail, preserving FIFO order */
    202 	if (!ignoreLocks)
    203 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    204 	data = *tail;
    205 	if (data) {
    206 		if (*head == *tail) {
    207 			/* removing last item from queue */
    208 			*head = NULL;
    209 			*tail = NULL;
    210 		} else {
    211 			*tail = (*tail)->prev;
    212 			(*tail)->next = NULL;
    213 			RF_ASSERT((*head)->prev == NULL);
    214 			RF_ASSERT((*tail)->next == NULL);
    215 		}
    216 		data->next = NULL;
    217 		data->prev = NULL;
    218 		if (rf_parityLogDebug)
    219 			printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
    220 	}
    221 	if (*head) {
    222 		RF_ASSERT((*head)->prev == NULL);
    223 		RF_ASSERT((*tail)->next == NULL);
    224 	}
    225 	if (!ignoreLocks)
    226 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    227 	return (data);
    228 }
    229 
    230 
    231 static void
    232 RequeueParityLogData(
    233     RF_ParityLogData_t * data,
    234     RF_ParityLogData_t ** head,
    235     RF_ParityLogData_t ** tail)
    236 {
    237 	RF_Raid_t *raidPtr;
    238 
    239 	/* Insert an in-core parity log (*data) into the tail of a disk queue
    240 	 * (*head, *tail). NON-BLOCKING */
    241 
    242 	raidPtr = data->common->raidPtr;
    243 	RF_ASSERT(data);
    244 	if (rf_parityLogDebug)
    245 		printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
    246 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    247 	if (*tail) {
    248 		/* append to tail of list */
    249 		data->prev = *tail;
    250 		data->next = NULL;
    251 		(*tail)->next = data;
    252 		*tail = data;
    253 	} else {
    254 		/* inserting into an empty list */
    255 		*head = data;
    256 		*tail = data;
    257 		(*head)->prev = NULL;
    258 		(*tail)->next = NULL;
    259 	}
    260 	RF_ASSERT((*head)->prev == NULL);
    261 	RF_ASSERT((*tail)->next == NULL);
    262 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    263 }
    264 
    265 RF_ParityLogData_t *
    266 rf_CreateParityLogData(
    267     RF_ParityRecordType_t operation,
    268     RF_PhysDiskAddr_t * pda,
    269     caddr_t bufPtr,
    270     RF_Raid_t * raidPtr,
    271     int (*wakeFunc) (RF_DagNode_t * node, int status),
    272     void *wakeArg,
    273     RF_AccTraceEntry_t * tracerec,
    274     RF_Etimer_t startTime)
    275 {
    276 	RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
    277 	RF_CommonLogData_t *common;
    278 	RF_PhysDiskAddr_t *diskAddress;
    279 	int     boundary, offset = 0;
    280 
    281 	/* Return an initialized struct of info to be logged. Build one item
    282 	 * per physical disk address, one item per region.
    283 	 *
    284 	 * NON-BLOCKING */
    285 
    286 	diskAddress = pda;
    287 	common = AllocParityLogCommonData(raidPtr);
    288 	RF_ASSERT(common);
    289 
    290 	common->operation = operation;
    291 	common->bufPtr = bufPtr;
    292 	common->raidPtr = raidPtr;
    293 	common->wakeFunc = wakeFunc;
    294 	common->wakeArg = wakeArg;
    295 	common->tracerec = tracerec;
    296 	common->startTime = startTime;
    297 	common->cnt = 0;
    298 
    299 	if (rf_parityLogDebug)
    300 		printf("[entering CreateParityLogData]\n");
    301 	while (diskAddress) {
    302 		common->cnt++;
    303 		data = AllocParityLogData(raidPtr);
    304 		RF_ASSERT(data);
    305 		data->common = common;
    306 		data->next = NULL;
    307 		data->prev = NULL;
    308 		data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
    309 		if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
    310 			/* disk address does not cross a region boundary */
    311 			data->diskAddress = *diskAddress;
    312 			data->bufOffset = offset;
    313 			offset = offset + diskAddress->numSector;
    314 			EnqueueParityLogData(data, &resultHead, &resultTail);
    315 			/* adjust disk address */
    316 			diskAddress = diskAddress->next;
    317 		} else {
    318 			/* disk address crosses a region boundary */
    319 			/* find address where region is crossed */
    320 			boundary = 0;
    321 			while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
    322 				boundary++;
    323 
    324 			/* enter data before the boundary */
    325 			data->diskAddress = *diskAddress;
    326 			data->diskAddress.numSector = boundary;
    327 			data->bufOffset = offset;
    328 			offset += boundary;
    329 			EnqueueParityLogData(data, &resultHead, &resultTail);
    330 			/* adjust disk address */
    331 			diskAddress->startSector += boundary;
    332 			diskAddress->numSector -= boundary;
    333 		}
    334 	}
    335 	if (rf_parityLogDebug)
    336 		printf("[leaving CreateParityLogData]\n");
    337 	return (resultHead);
    338 }
    339 
    340 
    341 RF_ParityLogData_t *
    342 rf_SearchAndDequeueParityLogData(
    343     RF_Raid_t * raidPtr,
    344     int regionID,
    345     RF_ParityLogData_t ** head,
    346     RF_ParityLogData_t ** tail,
    347     int ignoreLocks)
    348 {
    349 	RF_ParityLogData_t *w;
    350 
    351 	/* Remove and return an in-core parity log from a specified region
    352 	 * (regionID). If a matching log is not found, return NULL.
    353 	 *
    354 	 * NON-BLOCKING. */
    355 
    356 	/* walk backward through a list, looking for an entry with a matching
    357 	 * region ID */
    358 	if (!ignoreLocks)
    359 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    360 	w = (*tail);
    361 	while (w) {
    362 		if (w->regionID == regionID) {
    363 			/* remove an element from the list */
    364 			if (w == *tail) {
    365 				if (*head == *tail) {
    366 					/* removing only element in the list */
    367 					*head = NULL;
    368 					*tail = NULL;
    369 				} else {
    370 					/* removing last item in the list */
    371 					*tail = (*tail)->prev;
    372 					(*tail)->next = NULL;
    373 					RF_ASSERT((*head)->prev == NULL);
    374 					RF_ASSERT((*tail)->next == NULL);
    375 				}
    376 			} else {
    377 				if (w == *head) {
    378 					/* removing first item in the list */
    379 					*head = (*head)->next;
    380 					(*head)->prev = NULL;
    381 					RF_ASSERT((*head)->prev == NULL);
    382 					RF_ASSERT((*tail)->next == NULL);
    383 				} else {
    384 					/* removing an item from the middle of
    385 					 * the list */
    386 					w->prev->next = w->next;
    387 					w->next->prev = w->prev;
    388 					RF_ASSERT((*head)->prev == NULL);
    389 					RF_ASSERT((*tail)->next == NULL);
    390 				}
    391 			}
    392 			w->prev = NULL;
    393 			w->next = NULL;
    394 			if (rf_parityLogDebug)
    395 				printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
    396 			return (w);
    397 		} else
    398 			w = w->prev;
    399 	}
    400 	if (!ignoreLocks)
    401 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    402 	return (NULL);
    403 }
    404 
    405 static RF_ParityLogData_t *
    406 DequeueMatchingLogData(
    407     RF_Raid_t * raidPtr,
    408     RF_ParityLogData_t ** head,
    409     RF_ParityLogData_t ** tail)
    410 {
    411 	RF_ParityLogData_t *logDataList, *logData;
    412 	int     regionID;
    413 
    414 	/* Remove and return an in-core parity log from the tail of a disk
    415 	 * queue (*head, *tail).  Then remove all matching (identical
    416 	 * regionIDs) logData and return as a linked list.
    417 	 *
    418 	 * NON-BLOCKING */
    419 
    420 	logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
    421 	if (logDataList) {
    422 		regionID = logDataList->regionID;
    423 		logData = logDataList;
    424 		logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
    425 		while (logData->next) {
    426 			logData = logData->next;
    427 			logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
    428 		}
    429 	}
    430 	return (logDataList);
    431 }
    432 
    433 
    434 static RF_ParityLog_t *
    435 AcquireParityLog(
    436     RF_ParityLogData_t * logData,
    437     int finish)
    438 {
    439 	RF_ParityLog_t *log = NULL;
    440 	RF_Raid_t *raidPtr;
    441 
    442 	/* Grab a log buffer from the pool and return it. If no buffers are
    443 	 * available, return NULL. NON-BLOCKING */
    444 	raidPtr = logData->common->raidPtr;
    445 	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
    446 	if (raidPtr->parityLogPool.parityLogs) {
    447 		log = raidPtr->parityLogPool.parityLogs;
    448 		raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
    449 		log->regionID = logData->regionID;
    450 		log->numRecords = 0;
    451 		log->next = NULL;
    452 		raidPtr->logsInUse++;
    453 		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
    454 	} else {
    455 		/* no logs available, so place ourselves on the queue of work
    456 		 * waiting on log buffers this is done while
    457 		 * parityLogPool.mutex is held, to ensure synchronization with
    458 		 * ReleaseParityLogs. */
    459 		if (rf_parityLogDebug)
    460 			printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
    461 		if (finish)
    462 			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
    463 		else
    464 			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
    465 	}
    466 	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
    467 	return (log);
    468 }
    469 
    470 void
    471 rf_ReleaseParityLogs(
    472     RF_Raid_t * raidPtr,
    473     RF_ParityLog_t * firstLog)
    474 {
    475 	RF_ParityLogData_t *logDataList;
    476 	RF_ParityLog_t *log, *lastLog;
    477 	int     cnt;
    478 
    479 	/* Insert a linked list of parity logs (firstLog) to the free list
    480 	 * (parityLogPool.parityLogPool)
    481 	 *
    482 	 * NON-BLOCKING. */
    483 
    484 	RF_ASSERT(firstLog);
    485 
    486 	/* Before returning logs to global free list, service all requests
    487 	 * which are blocked on logs.  Holding mutexes for parityLogPool and
    488 	 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
    489 	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
    490 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    491 	logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
    492 	log = firstLog;
    493 	if (firstLog)
    494 		firstLog = firstLog->next;
    495 	log->numRecords = 0;
    496 	log->next = NULL;
    497 	while (logDataList && log) {
    498 		RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
    499 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    500 		rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
    501 		if (rf_parityLogDebug)
    502 			printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
    503 		if (log == NULL) {
    504 			log = firstLog;
    505 			if (firstLog) {
    506 				firstLog = firstLog->next;
    507 				log->numRecords = 0;
    508 				log->next = NULL;
    509 			}
    510 		}
    511 		RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
    512 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    513 		if (log)
    514 			logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
    515 	}
    516 	/* return remaining logs to pool */
    517 	if (log) {
    518 		log->next = firstLog;
    519 		firstLog = log;
    520 	}
    521 	if (firstLog) {
    522 		lastLog = firstLog;
    523 		raidPtr->logsInUse--;
    524 		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
    525 		while (lastLog->next) {
    526 			lastLog = lastLog->next;
    527 			raidPtr->logsInUse--;
    528 			RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
    529 		}
    530 		lastLog->next = raidPtr->parityLogPool.parityLogs;
    531 		raidPtr->parityLogPool.parityLogs = firstLog;
    532 		cnt = 0;
    533 		log = raidPtr->parityLogPool.parityLogs;
    534 		while (log) {
    535 			cnt++;
    536 			log = log->next;
    537 		}
    538 		RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
    539 	}
    540 	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
    541 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    542 }
    543 
    544 static void
    545 ReintLog(
    546     RF_Raid_t * raidPtr,
    547     int regionID,
    548     RF_ParityLog_t * log)
    549 {
    550 	RF_ASSERT(log);
    551 
    552 	/* Insert an in-core parity log (log) into the disk queue of
    553 	 * reintegration work.  Set the flag (reintInProgress) for the
    554 	 * specified region (regionID) to indicate that reintegration is in
    555 	 * progress for this region. NON-BLOCKING */
    556 
    557 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    558 	raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;	/* cleared when reint
    559 									 * complete */
    560 
    561 	if (rf_parityLogDebug)
    562 		printf("[requesting reintegration of region %d]\n", log->regionID);
    563 	/* move record to reintegration queue */
    564 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    565 	log->next = raidPtr->parityLogDiskQueue.reintQueue;
    566 	raidPtr->parityLogDiskQueue.reintQueue = log;
    567 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    568 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    569 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
    570 }
    571 
    572 static void
    573 FlushLog(
    574     RF_Raid_t * raidPtr,
    575     RF_ParityLog_t * log)
    576 {
    577 	/* insert a core log (log) into a list of logs
    578 	 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
    579 	 * NON-BLOCKING */
    580 
    581 	RF_ASSERT(log);
    582 	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
    583 	RF_ASSERT(log->next == NULL);
    584 	/* move log to flush queue */
    585 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    586 	log->next = raidPtr->parityLogDiskQueue.flushQueue;
    587 	raidPtr->parityLogDiskQueue.flushQueue = log;
    588 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    589 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
    590 }
    591 
    592 static int
    593 DumpParityLogToDisk(
    594     int finish,
    595     RF_ParityLogData_t * logData)
    596 {
    597 	int     i, diskCount, regionID = logData->regionID;
    598 	RF_ParityLog_t *log;
    599 	RF_Raid_t *raidPtr;
    600 
    601 	raidPtr = logData->common->raidPtr;
    602 
    603 	/* Move a core log to disk.  If the log disk is full, initiate
    604 	 * reintegration.
    605 	 *
    606 	 * Return (0) if we can enqueue the dump immediately, otherwise return
    607 	 * (1) to indicate we are blocked on reintegration and control of the
    608 	 * thread should be relinquished.
    609 	 *
    610 	 * Caller must hold regionInfo[regionID].mutex
    611 	 *
    612 	 * NON-BLOCKING */
    613 
    614 	if (rf_parityLogDebug)
    615 		printf("[dumping parity log to disk, region %d]\n", regionID);
    616 	log = raidPtr->regionInfo[regionID].coreLog;
    617 	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
    618 	RF_ASSERT(log->next == NULL);
    619 
    620 	/* if reintegration is in progress, must queue work */
    621 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    622 	if (raidPtr->regionInfo[regionID].reintInProgress) {
    623 		/* Can not proceed since this region is currently being
    624 		 * reintegrated. We can not block, so queue remaining work and
    625 		 * return */
    626 		if (rf_parityLogDebug)
    627 			printf("[region %d waiting on reintegration]\n", regionID);
    628 		/* XXX not sure about the use of finish - shouldn't this
    629 		 * always be "Enqueue"? */
    630 		if (finish)
    631 			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
    632 		else
    633 			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
    634 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    635 		return (1);	/* relenquish control of this thread */
    636 	}
    637 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    638 	raidPtr->regionInfo[regionID].coreLog = NULL;
    639 	if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
    640 		/* IMPORTANT!! this loop bound assumes region disk holds an
    641 		 * integral number of core logs */
    642 	{
    643 		/* update disk map for this region */
    644 		diskCount = raidPtr->regionInfo[regionID].diskCount;
    645 		for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
    646 			raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
    647 			raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
    648 		}
    649 		log->diskOffset = diskCount;
    650 		raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
    651 		FlushLog(raidPtr, log);
    652 	} else {
    653 		/* no room for log on disk, send it to disk manager and
    654 		 * request reintegration */
    655 		RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
    656 		ReintLog(raidPtr, regionID, log);
    657 	}
    658 	if (rf_parityLogDebug)
    659 		printf("[finished dumping parity log to disk, region %d]\n", regionID);
    660 	return (0);
    661 }
    662 
    663 int
    664 rf_ParityLogAppend(
    665     RF_ParityLogData_t * logData,
    666     int finish,
    667     RF_ParityLog_t ** incomingLog,
    668     int clearReintFlag)
    669 {
    670 	int     regionID, logItem, itemDone;
    671 	RF_ParityLogData_t *item;
    672 	int     punt, done = RF_FALSE;
    673 	RF_ParityLog_t *log;
    674 	RF_Raid_t *raidPtr;
    675 	RF_Etimer_t timer;
    676 	int     (*wakeFunc) (RF_DagNode_t * node, int status);
    677 	void   *wakeArg;
    678 
    679 	/* Add parity to the appropriate log, one sector at a time. This
    680 	 * routine is called is called by dag functions ParityLogUpdateFunc
    681 	 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
    682 	 *
    683 	 * Parity to be logged is contained in a linked-list (logData).  When
    684 	 * this routine returns, every sector in the list will be in one of
    685 	 * three places: 1) entered into the parity log 2) queued, waiting on
    686 	 * reintegration 3) queued, waiting on a core log
    687 	 *
    688 	 * Blocked work is passed to the ParityLoggingDiskManager for completion.
    689 	 * Later, as conditions which required the block are removed, the work
    690 	 * reenters this routine with the "finish" parameter set to "RF_TRUE."
    691 	 *
    692 	 * NON-BLOCKING */
    693 
    694 	raidPtr = logData->common->raidPtr;
    695 	/* lock the region for the first item in logData */
    696 	RF_ASSERT(logData != NULL);
    697 	regionID = logData->regionID;
    698 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    699 	RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
    700 
    701 	if (clearReintFlag) {
    702 		/* Enable flushing for this region.  Holding both locks
    703 		 * provides a synchronization barrier with DumpParityLogToDisk */
    704 		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    705 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    706 		RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
    707 		raidPtr->regionInfo[regionID].diskCount = 0;
    708 		raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
    709 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);	/* flushing is now
    710 										 * enabled */
    711 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    712 	}
    713 	/* process each item in logData */
    714 	while (logData) {
    715 		/* remove an item from logData */
    716 		item = logData;
    717 		logData = logData->next;
    718 		item->next = NULL;
    719 		item->prev = NULL;
    720 
    721 		if (rf_parityLogDebug)
    722 			printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
    723 
    724 		/* see if we moved to a new region */
    725 		if (regionID != item->regionID) {
    726 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    727 			regionID = item->regionID;
    728 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    729 			RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
    730 		}
    731 		punt = RF_FALSE;/* Set to RF_TRUE if work is blocked.  This
    732 				 * can happen in one of two ways: 1) no core
    733 				 * log (AcquireParityLog) 2) waiting on
    734 				 * reintegration (DumpParityLogToDisk) If punt
    735 				 * is RF_TRUE, the dataItem was queued, so
    736 				 * skip to next item. */
    737 
    738 		/* process item, one sector at a time, until all sectors
    739 		 * processed or we punt */
    740 		if (item->diskAddress.numSector > 0)
    741 			done = RF_FALSE;
    742 		else
    743 			RF_ASSERT(0);
    744 		while (!punt && !done) {
    745 			/* verify that a core log exists for this region */
    746 			if (!raidPtr->regionInfo[regionID].coreLog) {
    747 				/* Attempt to acquire a parity log. If
    748 				 * acquisition fails, queue remaining work in
    749 				 * data item and move to nextItem. */
    750 				if (incomingLog)
    751 					if (*incomingLog) {
    752 						RF_ASSERT((*incomingLog)->next == NULL);
    753 						raidPtr->regionInfo[regionID].coreLog = *incomingLog;
    754 						raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
    755 						*incomingLog = NULL;
    756 					} else
    757 						raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
    758 				else
    759 					raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
    760 				/* Note: AcquireParityLog either returns a log
    761 				 * or enqueues currentItem */
    762 			}
    763 			if (!raidPtr->regionInfo[regionID].coreLog)
    764 				punt = RF_TRUE;	/* failed to find a core log */
    765 			else {
    766 				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
    767 				/* verify that the log has room for new
    768 				 * entries */
    769 				/* if log is full, dump it to disk and grab a
    770 				 * new log */
    771 				if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
    772 					/* log is full, dump it to disk */
    773 					if (DumpParityLogToDisk(finish, item))
    774 						punt = RF_TRUE;	/* dump unsuccessful,
    775 								 * blocked on
    776 								 * reintegration */
    777 					else {
    778 						/* dump was successful */
    779 						if (incomingLog)
    780 							if (*incomingLog) {
    781 								RF_ASSERT((*incomingLog)->next == NULL);
    782 								raidPtr->regionInfo[regionID].coreLog = *incomingLog;
    783 								raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
    784 								*incomingLog = NULL;
    785 							} else
    786 								raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
    787 						else
    788 							raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
    789 						/* if a core log is not
    790 						 * available, must queue work
    791 						 * and return */
    792 						if (!raidPtr->regionInfo[regionID].coreLog)
    793 							punt = RF_TRUE;	/* blocked on log
    794 									 * availability */
    795 					}
    796 				}
    797 			}
    798 			/* if we didn't punt on this item, attempt to add a
    799 			 * sector to the core log */
    800 			if (!punt) {
    801 				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
    802 				/* at this point, we have a core log with
    803 				 * enough room for a sector */
    804 				/* copy a sector into the log */
    805 				log = raidPtr->regionInfo[regionID].coreLog;
    806 				RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
    807 				logItem = log->numRecords++;
    808 				log->records[logItem].parityAddr = item->diskAddress;
    809 				RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
    810 				RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
    811 				log->records[logItem].parityAddr.numSector = 1;
    812 				log->records[logItem].operation = item->common->operation;
    813 				bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector));
    814 				item->diskAddress.numSector--;
    815 				item->diskAddress.startSector++;
    816 				if (item->diskAddress.numSector == 0)
    817 					done = RF_TRUE;
    818 			}
    819 		}
    820 
    821 		if (!punt) {
    822 			/* Processed this item completely, decrement count of
    823 			 * items to be processed. */
    824 			RF_ASSERT(item->diskAddress.numSector == 0);
    825 			RF_LOCK_MUTEX(item->common->mutex);
    826 			item->common->cnt--;
    827 			if (item->common->cnt == 0)
    828 				itemDone = RF_TRUE;
    829 			else
    830 				itemDone = RF_FALSE;
    831 			RF_UNLOCK_MUTEX(item->common->mutex);
    832 			if (itemDone) {
    833 				/* Finished processing all log data for this
    834 				 * IO Return structs to free list and invoke
    835 				 * wakeup function. */
    836 				timer = item->common->startTime;	/* grab initial value of
    837 									 * timer */
    838 				RF_ETIMER_STOP(timer);
    839 				RF_ETIMER_EVAL(timer);
    840 				item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
    841 				if (rf_parityLogDebug)
    842 					printf("[waking process for region %d]\n", item->regionID);
    843 				wakeFunc = item->common->wakeFunc;
    844 				wakeArg = item->common->wakeArg;
    845 				FreeParityLogCommonData(item->common);
    846 				FreeParityLogData(item);
    847 				(wakeFunc) (wakeArg, 0);
    848 			} else
    849 				FreeParityLogData(item);
    850 		}
    851 	}
    852 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    853 	if (rf_parityLogDebug)
    854 		printf("[exiting ParityLogAppend]\n");
    855 	return (0);
    856 }
    857 
    858 
    859 void
    860 rf_EnableParityLogging(RF_Raid_t * raidPtr)
    861 {
    862 	int     regionID;
    863 
    864 	for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
    865 		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    866 		raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
    867 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    868 	}
    869 	if (rf_parityLogDebug)
    870 		printf("[parity logging enabled]\n");
    871 }
    872 #endif				/* RF_INCLUDE_PARITYLOGGING > 0 */
    873