Home | History | Annotate | Line # | Download | only in raidframe
rf_paritylog.c revision 1.6
      1 /*	$NetBSD: rf_paritylog.c,v 1.6 2001/10/04 15:58:54 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: William V. Courtright II
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /* Code for manipulating in-core parity logs
     30  *
     31  */
     32 
     33 #include "rf_archs.h"
     34 
     35 #if RF_INCLUDE_PARITYLOGGING > 0
     36 
     37 /*
     38  * Append-only log for recording parity "update" and "overwrite" records
     39  */
     40 
     41 #include <dev/raidframe/raidframevar.h>
     42 
     43 #include "rf_threadstuff.h"
     44 #include "rf_mcpair.h"
     45 #include "rf_raid.h"
     46 #include "rf_dag.h"
     47 #include "rf_dagfuncs.h"
     48 #include "rf_desc.h"
     49 #include "rf_layout.h"
     50 #include "rf_diskqueue.h"
     51 #include "rf_etimer.h"
     52 #include "rf_paritylog.h"
     53 #include "rf_general.h"
     54 #include "rf_map.h"
     55 #include "rf_paritylogging.h"
     56 #include "rf_paritylogDiskMgr.h"
     57 
     58 static RF_CommonLogData_t *
     59 AllocParityLogCommonData(RF_Raid_t * raidPtr)
     60 {
     61 	RF_CommonLogData_t *common = NULL;
     62 	int     rc;
     63 
     64 	/* Return a struct for holding common parity log information from the
     65 	 * free list (rf_parityLogDiskQueue.freeCommonList).  If the free list
     66 	 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
     67 
     68 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
     69 	if (raidPtr->parityLogDiskQueue.freeCommonList) {
     70 		common = raidPtr->parityLogDiskQueue.freeCommonList;
     71 		raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
     72 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
     73 	} else {
     74 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
     75 		RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
     76 		rc = rf_mutex_init(&common->mutex);
     77 		if (rc) {
     78 			RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
     79 			    __LINE__, rc);
     80 			RF_Free(common, sizeof(RF_CommonLogData_t));
     81 			common = NULL;
     82 		}
     83 	}
     84 	common->next = NULL;
     85 	return (common);
     86 }
     87 
     88 static void
     89 FreeParityLogCommonData(RF_CommonLogData_t * common)
     90 {
     91 	RF_Raid_t *raidPtr;
     92 
     93 	/* Insert a single struct for holding parity log information (data)
     94 	 * into the free list (rf_parityLogDiskQueue.freeCommonList).
     95 	 * NON-BLOCKING */
     96 
     97 	raidPtr = common->raidPtr;
     98 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
     99 	common->next = raidPtr->parityLogDiskQueue.freeCommonList;
    100 	raidPtr->parityLogDiskQueue.freeCommonList = common;
    101 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    102 }
    103 
    104 static RF_ParityLogData_t *
    105 AllocParityLogData(RF_Raid_t * raidPtr)
    106 {
    107 	RF_ParityLogData_t *data = NULL;
    108 
    109 	/* Return a struct for holding parity log information from the free
    110 	 * list (rf_parityLogDiskQueue.freeList).  If the free list is empty,
    111 	 * call RF_Malloc to create a new structure. NON-BLOCKING */
    112 
    113 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    114 	if (raidPtr->parityLogDiskQueue.freeDataList) {
    115 		data = raidPtr->parityLogDiskQueue.freeDataList;
    116 		raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
    117 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    118 	} else {
    119 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    120 		RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
    121 	}
    122 	data->next = NULL;
    123 	data->prev = NULL;
    124 	return (data);
    125 }
    126 
    127 
    128 static void
    129 FreeParityLogData(RF_ParityLogData_t * data)
    130 {
    131 	RF_ParityLogData_t *nextItem;
    132 	RF_Raid_t *raidPtr;
    133 
    134 	/* Insert a linked list of structs for holding parity log information
    135 	 * (data) into the free list (parityLogDiskQueue.freeList).
    136 	 * NON-BLOCKING */
    137 
    138 	raidPtr = data->common->raidPtr;
    139 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    140 	while (data) {
    141 		nextItem = data->next;
    142 		data->next = raidPtr->parityLogDiskQueue.freeDataList;
    143 		raidPtr->parityLogDiskQueue.freeDataList = data;
    144 		data = nextItem;
    145 	}
    146 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    147 }
    148 
    149 
    150 static void
    151 EnqueueParityLogData(
    152     RF_ParityLogData_t * data,
    153     RF_ParityLogData_t ** head,
    154     RF_ParityLogData_t ** tail)
    155 {
    156 	RF_Raid_t *raidPtr;
    157 
    158 	/* Insert an in-core parity log (*data) into the head of a disk queue
    159 	 * (*head, *tail). NON-BLOCKING */
    160 
    161 	raidPtr = data->common->raidPtr;
    162 	if (rf_parityLogDebug)
    163 		printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
    164 	RF_ASSERT(data->prev == NULL);
    165 	RF_ASSERT(data->next == NULL);
    166 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    167 	if (*head) {
    168 		/* insert into head of queue */
    169 		RF_ASSERT((*head)->prev == NULL);
    170 		RF_ASSERT((*tail)->next == NULL);
    171 		data->next = *head;
    172 		(*head)->prev = data;
    173 		*head = data;
    174 	} else {
    175 		/* insert into empty list */
    176 		RF_ASSERT(*head == NULL);
    177 		RF_ASSERT(*tail == NULL);
    178 		*head = data;
    179 		*tail = data;
    180 	}
    181 	RF_ASSERT((*head)->prev == NULL);
    182 	RF_ASSERT((*tail)->next == NULL);
    183 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    184 }
    185 
    186 static RF_ParityLogData_t *
    187 DequeueParityLogData(
    188     RF_Raid_t * raidPtr,
    189     RF_ParityLogData_t ** head,
    190     RF_ParityLogData_t ** tail,
    191     int ignoreLocks)
    192 {
    193 	RF_ParityLogData_t *data;
    194 
    195 	/* Remove and return an in-core parity log from the tail of a disk
    196 	 * queue (*head, *tail). NON-BLOCKING */
    197 
    198 	/* remove from tail, preserving FIFO order */
    199 	if (!ignoreLocks)
    200 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    201 	data = *tail;
    202 	if (data) {
    203 		if (*head == *tail) {
    204 			/* removing last item from queue */
    205 			*head = NULL;
    206 			*tail = NULL;
    207 		} else {
    208 			*tail = (*tail)->prev;
    209 			(*tail)->next = NULL;
    210 			RF_ASSERT((*head)->prev == NULL);
    211 			RF_ASSERT((*tail)->next == NULL);
    212 		}
    213 		data->next = NULL;
    214 		data->prev = NULL;
    215 		if (rf_parityLogDebug)
    216 			printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
    217 	}
    218 	if (*head) {
    219 		RF_ASSERT((*head)->prev == NULL);
    220 		RF_ASSERT((*tail)->next == NULL);
    221 	}
    222 	if (!ignoreLocks)
    223 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    224 	return (data);
    225 }
    226 
    227 
    228 static void
    229 RequeueParityLogData(
    230     RF_ParityLogData_t * data,
    231     RF_ParityLogData_t ** head,
    232     RF_ParityLogData_t ** tail)
    233 {
    234 	RF_Raid_t *raidPtr;
    235 
    236 	/* Insert an in-core parity log (*data) into the tail of a disk queue
    237 	 * (*head, *tail). NON-BLOCKING */
    238 
    239 	raidPtr = data->common->raidPtr;
    240 	RF_ASSERT(data);
    241 	if (rf_parityLogDebug)
    242 		printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
    243 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    244 	if (*tail) {
    245 		/* append to tail of list */
    246 		data->prev = *tail;
    247 		data->next = NULL;
    248 		(*tail)->next = data;
    249 		*tail = data;
    250 	} else {
    251 		/* inserting into an empty list */
    252 		*head = data;
    253 		*tail = data;
    254 		(*head)->prev = NULL;
    255 		(*tail)->next = NULL;
    256 	}
    257 	RF_ASSERT((*head)->prev == NULL);
    258 	RF_ASSERT((*tail)->next == NULL);
    259 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    260 }
    261 
    262 RF_ParityLogData_t *
    263 rf_CreateParityLogData(
    264     RF_ParityRecordType_t operation,
    265     RF_PhysDiskAddr_t * pda,
    266     caddr_t bufPtr,
    267     RF_Raid_t * raidPtr,
    268     int (*wakeFunc) (RF_DagNode_t * node, int status),
    269     void *wakeArg,
    270     RF_AccTraceEntry_t * tracerec,
    271     RF_Etimer_t startTime)
    272 {
    273 	RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
    274 	RF_CommonLogData_t *common;
    275 	RF_PhysDiskAddr_t *diskAddress;
    276 	int     boundary, offset = 0;
    277 
    278 	/* Return an initialized struct of info to be logged. Build one item
    279 	 * per physical disk address, one item per region.
    280 	 *
    281 	 * NON-BLOCKING */
    282 
    283 	diskAddress = pda;
    284 	common = AllocParityLogCommonData(raidPtr);
    285 	RF_ASSERT(common);
    286 
    287 	common->operation = operation;
    288 	common->bufPtr = bufPtr;
    289 	common->raidPtr = raidPtr;
    290 	common->wakeFunc = wakeFunc;
    291 	common->wakeArg = wakeArg;
    292 	common->tracerec = tracerec;
    293 	common->startTime = startTime;
    294 	common->cnt = 0;
    295 
    296 	if (rf_parityLogDebug)
    297 		printf("[entering CreateParityLogData]\n");
    298 	while (diskAddress) {
    299 		common->cnt++;
    300 		data = AllocParityLogData(raidPtr);
    301 		RF_ASSERT(data);
    302 		data->common = common;
    303 		data->next = NULL;
    304 		data->prev = NULL;
    305 		data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
    306 		if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
    307 			/* disk address does not cross a region boundary */
    308 			data->diskAddress = *diskAddress;
    309 			data->bufOffset = offset;
    310 			offset = offset + diskAddress->numSector;
    311 			EnqueueParityLogData(data, &resultHead, &resultTail);
    312 			/* adjust disk address */
    313 			diskAddress = diskAddress->next;
    314 		} else {
    315 			/* disk address crosses a region boundary */
    316 			/* find address where region is crossed */
    317 			boundary = 0;
    318 			while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
    319 				boundary++;
    320 
    321 			/* enter data before the boundary */
    322 			data->diskAddress = *diskAddress;
    323 			data->diskAddress.numSector = boundary;
    324 			data->bufOffset = offset;
    325 			offset += boundary;
    326 			EnqueueParityLogData(data, &resultHead, &resultTail);
    327 			/* adjust disk address */
    328 			diskAddress->startSector += boundary;
    329 			diskAddress->numSector -= boundary;
    330 		}
    331 	}
    332 	if (rf_parityLogDebug)
    333 		printf("[leaving CreateParityLogData]\n");
    334 	return (resultHead);
    335 }
    336 
    337 
    338 RF_ParityLogData_t *
    339 rf_SearchAndDequeueParityLogData(
    340     RF_Raid_t * raidPtr,
    341     int regionID,
    342     RF_ParityLogData_t ** head,
    343     RF_ParityLogData_t ** tail,
    344     int ignoreLocks)
    345 {
    346 	RF_ParityLogData_t *w;
    347 
    348 	/* Remove and return an in-core parity log from a specified region
    349 	 * (regionID). If a matching log is not found, return NULL.
    350 	 *
    351 	 * NON-BLOCKING. */
    352 
    353 	/* walk backward through a list, looking for an entry with a matching
    354 	 * region ID */
    355 	if (!ignoreLocks)
    356 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    357 	w = (*tail);
    358 	while (w) {
    359 		if (w->regionID == regionID) {
    360 			/* remove an element from the list */
    361 			if (w == *tail) {
    362 				if (*head == *tail) {
    363 					/* removing only element in the list */
    364 					*head = NULL;
    365 					*tail = NULL;
    366 				} else {
    367 					/* removing last item in the list */
    368 					*tail = (*tail)->prev;
    369 					(*tail)->next = NULL;
    370 					RF_ASSERT((*head)->prev == NULL);
    371 					RF_ASSERT((*tail)->next == NULL);
    372 				}
    373 			} else {
    374 				if (w == *head) {
    375 					/* removing first item in the list */
    376 					*head = (*head)->next;
    377 					(*head)->prev = NULL;
    378 					RF_ASSERT((*head)->prev == NULL);
    379 					RF_ASSERT((*tail)->next == NULL);
    380 				} else {
    381 					/* removing an item from the middle of
    382 					 * the list */
    383 					w->prev->next = w->next;
    384 					w->next->prev = w->prev;
    385 					RF_ASSERT((*head)->prev == NULL);
    386 					RF_ASSERT((*tail)->next == NULL);
    387 				}
    388 			}
    389 			w->prev = NULL;
    390 			w->next = NULL;
    391 			if (rf_parityLogDebug)
    392 				printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
    393 			return (w);
    394 		} else
    395 			w = w->prev;
    396 	}
    397 	if (!ignoreLocks)
    398 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    399 	return (NULL);
    400 }
    401 
    402 static RF_ParityLogData_t *
    403 DequeueMatchingLogData(
    404     RF_Raid_t * raidPtr,
    405     RF_ParityLogData_t ** head,
    406     RF_ParityLogData_t ** tail)
    407 {
    408 	RF_ParityLogData_t *logDataList, *logData;
    409 	int     regionID;
    410 
    411 	/* Remove and return an in-core parity log from the tail of a disk
    412 	 * queue (*head, *tail).  Then remove all matching (identical
    413 	 * regionIDs) logData and return as a linked list.
    414 	 *
    415 	 * NON-BLOCKING */
    416 
    417 	logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
    418 	if (logDataList) {
    419 		regionID = logDataList->regionID;
    420 		logData = logDataList;
    421 		logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
    422 		while (logData->next) {
    423 			logData = logData->next;
    424 			logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
    425 		}
    426 	}
    427 	return (logDataList);
    428 }
    429 
    430 
    431 static RF_ParityLog_t *
    432 AcquireParityLog(
    433     RF_ParityLogData_t * logData,
    434     int finish)
    435 {
    436 	RF_ParityLog_t *log = NULL;
    437 	RF_Raid_t *raidPtr;
    438 
    439 	/* Grab a log buffer from the pool and return it. If no buffers are
    440 	 * available, return NULL. NON-BLOCKING */
    441 	raidPtr = logData->common->raidPtr;
    442 	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
    443 	if (raidPtr->parityLogPool.parityLogs) {
    444 		log = raidPtr->parityLogPool.parityLogs;
    445 		raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
    446 		log->regionID = logData->regionID;
    447 		log->numRecords = 0;
    448 		log->next = NULL;
    449 		raidPtr->logsInUse++;
    450 		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
    451 	} else {
    452 		/* no logs available, so place ourselves on the queue of work
    453 		 * waiting on log buffers this is done while
    454 		 * parityLogPool.mutex is held, to ensure synchronization with
    455 		 * ReleaseParityLogs. */
    456 		if (rf_parityLogDebug)
    457 			printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
    458 		if (finish)
    459 			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
    460 		else
    461 			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
    462 	}
    463 	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
    464 	return (log);
    465 }
    466 
    467 void
    468 rf_ReleaseParityLogs(
    469     RF_Raid_t * raidPtr,
    470     RF_ParityLog_t * firstLog)
    471 {
    472 	RF_ParityLogData_t *logDataList;
    473 	RF_ParityLog_t *log, *lastLog;
    474 	int     cnt;
    475 
    476 	/* Insert a linked list of parity logs (firstLog) to the free list
    477 	 * (parityLogPool.parityLogPool)
    478 	 *
    479 	 * NON-BLOCKING. */
    480 
    481 	RF_ASSERT(firstLog);
    482 
    483 	/* Before returning logs to global free list, service all requests
    484 	 * which are blocked on logs.  Holding mutexes for parityLogPool and
    485 	 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
    486 	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
    487 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    488 	logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
    489 	log = firstLog;
    490 	if (firstLog)
    491 		firstLog = firstLog->next;
    492 	log->numRecords = 0;
    493 	log->next = NULL;
    494 	while (logDataList && log) {
    495 		RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
    496 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    497 		rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
    498 		if (rf_parityLogDebug)
    499 			printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
    500 		if (log == NULL) {
    501 			log = firstLog;
    502 			if (firstLog) {
    503 				firstLog = firstLog->next;
    504 				log->numRecords = 0;
    505 				log->next = NULL;
    506 			}
    507 		}
    508 		RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
    509 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    510 		if (log)
    511 			logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
    512 	}
    513 	/* return remaining logs to pool */
    514 	if (log) {
    515 		log->next = firstLog;
    516 		firstLog = log;
    517 	}
    518 	if (firstLog) {
    519 		lastLog = firstLog;
    520 		raidPtr->logsInUse--;
    521 		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
    522 		while (lastLog->next) {
    523 			lastLog = lastLog->next;
    524 			raidPtr->logsInUse--;
    525 			RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
    526 		}
    527 		lastLog->next = raidPtr->parityLogPool.parityLogs;
    528 		raidPtr->parityLogPool.parityLogs = firstLog;
    529 		cnt = 0;
    530 		log = raidPtr->parityLogPool.parityLogs;
    531 		while (log) {
    532 			cnt++;
    533 			log = log->next;
    534 		}
    535 		RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
    536 	}
    537 	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
    538 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    539 }
    540 
    541 static void
    542 ReintLog(
    543     RF_Raid_t * raidPtr,
    544     int regionID,
    545     RF_ParityLog_t * log)
    546 {
    547 	RF_ASSERT(log);
    548 
    549 	/* Insert an in-core parity log (log) into the disk queue of
    550 	 * reintegration work.  Set the flag (reintInProgress) for the
    551 	 * specified region (regionID) to indicate that reintegration is in
    552 	 * progress for this region. NON-BLOCKING */
    553 
    554 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    555 	raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;	/* cleared when reint
    556 									 * complete */
    557 
    558 	if (rf_parityLogDebug)
    559 		printf("[requesting reintegration of region %d]\n", log->regionID);
    560 	/* move record to reintegration queue */
    561 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    562 	log->next = raidPtr->parityLogDiskQueue.reintQueue;
    563 	raidPtr->parityLogDiskQueue.reintQueue = log;
    564 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    565 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    566 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
    567 }
    568 
    569 static void
    570 FlushLog(
    571     RF_Raid_t * raidPtr,
    572     RF_ParityLog_t * log)
    573 {
    574 	/* insert a core log (log) into a list of logs
    575 	 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
    576 	 * NON-BLOCKING */
    577 
    578 	RF_ASSERT(log);
    579 	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
    580 	RF_ASSERT(log->next == NULL);
    581 	/* move log to flush queue */
    582 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    583 	log->next = raidPtr->parityLogDiskQueue.flushQueue;
    584 	raidPtr->parityLogDiskQueue.flushQueue = log;
    585 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    586 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
    587 }
    588 
    589 static int
    590 DumpParityLogToDisk(
    591     int finish,
    592     RF_ParityLogData_t * logData)
    593 {
    594 	int     i, diskCount, regionID = logData->regionID;
    595 	RF_ParityLog_t *log;
    596 	RF_Raid_t *raidPtr;
    597 
    598 	raidPtr = logData->common->raidPtr;
    599 
    600 	/* Move a core log to disk.  If the log disk is full, initiate
    601 	 * reintegration.
    602 	 *
    603 	 * Return (0) if we can enqueue the dump immediately, otherwise return
    604 	 * (1) to indicate we are blocked on reintegration and control of the
    605 	 * thread should be relinquished.
    606 	 *
    607 	 * Caller must hold regionInfo[regionID].mutex
    608 	 *
    609 	 * NON-BLOCKING */
    610 
    611 	if (rf_parityLogDebug)
    612 		printf("[dumping parity log to disk, region %d]\n", regionID);
    613 	log = raidPtr->regionInfo[regionID].coreLog;
    614 	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
    615 	RF_ASSERT(log->next == NULL);
    616 
    617 	/* if reintegration is in progress, must queue work */
    618 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    619 	if (raidPtr->regionInfo[regionID].reintInProgress) {
    620 		/* Can not proceed since this region is currently being
    621 		 * reintegrated. We can not block, so queue remaining work and
    622 		 * return */
    623 		if (rf_parityLogDebug)
    624 			printf("[region %d waiting on reintegration]\n", regionID);
    625 		/* XXX not sure about the use of finish - shouldn't this
    626 		 * always be "Enqueue"? */
    627 		if (finish)
    628 			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
    629 		else
    630 			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
    631 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    632 		return (1);	/* relenquish control of this thread */
    633 	}
    634 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    635 	raidPtr->regionInfo[regionID].coreLog = NULL;
    636 	if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
    637 		/* IMPORTANT!! this loop bound assumes region disk holds an
    638 		 * integral number of core logs */
    639 	{
    640 		/* update disk map for this region */
    641 		diskCount = raidPtr->regionInfo[regionID].diskCount;
    642 		for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
    643 			raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
    644 			raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
    645 		}
    646 		log->diskOffset = diskCount;
    647 		raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
    648 		FlushLog(raidPtr, log);
    649 	} else {
    650 		/* no room for log on disk, send it to disk manager and
    651 		 * request reintegration */
    652 		RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
    653 		ReintLog(raidPtr, regionID, log);
    654 	}
    655 	if (rf_parityLogDebug)
    656 		printf("[finished dumping parity log to disk, region %d]\n", regionID);
    657 	return (0);
    658 }
    659 
    660 int
    661 rf_ParityLogAppend(
    662     RF_ParityLogData_t * logData,
    663     int finish,
    664     RF_ParityLog_t ** incomingLog,
    665     int clearReintFlag)
    666 {
    667 	int     regionID, logItem, itemDone;
    668 	RF_ParityLogData_t *item;
    669 	int     punt, done = RF_FALSE;
    670 	RF_ParityLog_t *log;
    671 	RF_Raid_t *raidPtr;
    672 	RF_Etimer_t timer;
    673 	int     (*wakeFunc) (RF_DagNode_t * node, int status);
    674 	void   *wakeArg;
    675 
    676 	/* Add parity to the appropriate log, one sector at a time. This
    677 	 * routine is called is called by dag functions ParityLogUpdateFunc
    678 	 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
    679 	 *
    680 	 * Parity to be logged is contained in a linked-list (logData).  When
    681 	 * this routine returns, every sector in the list will be in one of
    682 	 * three places: 1) entered into the parity log 2) queued, waiting on
    683 	 * reintegration 3) queued, waiting on a core log
    684 	 *
    685 	 * Blocked work is passed to the ParityLoggingDiskManager for completion.
    686 	 * Later, as conditions which required the block are removed, the work
    687 	 * reenters this routine with the "finish" parameter set to "RF_TRUE."
    688 	 *
    689 	 * NON-BLOCKING */
    690 
    691 	raidPtr = logData->common->raidPtr;
    692 	/* lock the region for the first item in logData */
    693 	RF_ASSERT(logData != NULL);
    694 	regionID = logData->regionID;
    695 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    696 	RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
    697 
    698 	if (clearReintFlag) {
    699 		/* Enable flushing for this region.  Holding both locks
    700 		 * provides a synchronization barrier with DumpParityLogToDisk */
    701 		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
    702 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    703 		RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
    704 		raidPtr->regionInfo[regionID].diskCount = 0;
    705 		raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
    706 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);	/* flushing is now
    707 										 * enabled */
    708 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
    709 	}
    710 	/* process each item in logData */
    711 	while (logData) {
    712 		/* remove an item from logData */
    713 		item = logData;
    714 		logData = logData->next;
    715 		item->next = NULL;
    716 		item->prev = NULL;
    717 
    718 		if (rf_parityLogDebug)
    719 			printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
    720 
    721 		/* see if we moved to a new region */
    722 		if (regionID != item->regionID) {
    723 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    724 			regionID = item->regionID;
    725 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    726 			RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
    727 		}
    728 		punt = RF_FALSE;/* Set to RF_TRUE if work is blocked.  This
    729 				 * can happen in one of two ways: 1) no core
    730 				 * log (AcquireParityLog) 2) waiting on
    731 				 * reintegration (DumpParityLogToDisk) If punt
    732 				 * is RF_TRUE, the dataItem was queued, so
    733 				 * skip to next item. */
    734 
    735 		/* process item, one sector at a time, until all sectors
    736 		 * processed or we punt */
    737 		if (item->diskAddress.numSector > 0)
    738 			done = RF_FALSE;
    739 		else
    740 			RF_ASSERT(0);
    741 		while (!punt && !done) {
    742 			/* verify that a core log exists for this region */
    743 			if (!raidPtr->regionInfo[regionID].coreLog) {
    744 				/* Attempt to acquire a parity log. If
    745 				 * acquisition fails, queue remaining work in
    746 				 * data item and move to nextItem. */
    747 				if (incomingLog)
    748 					if (*incomingLog) {
    749 						RF_ASSERT((*incomingLog)->next == NULL);
    750 						raidPtr->regionInfo[regionID].coreLog = *incomingLog;
    751 						raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
    752 						*incomingLog = NULL;
    753 					} else
    754 						raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
    755 				else
    756 					raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
    757 				/* Note: AcquireParityLog either returns a log
    758 				 * or enqueues currentItem */
    759 			}
    760 			if (!raidPtr->regionInfo[regionID].coreLog)
    761 				punt = RF_TRUE;	/* failed to find a core log */
    762 			else {
    763 				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
    764 				/* verify that the log has room for new
    765 				 * entries */
    766 				/* if log is full, dump it to disk and grab a
    767 				 * new log */
    768 				if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
    769 					/* log is full, dump it to disk */
    770 					if (DumpParityLogToDisk(finish, item))
    771 						punt = RF_TRUE;	/* dump unsuccessful,
    772 								 * blocked on
    773 								 * reintegration */
    774 					else {
    775 						/* dump was successful */
    776 						if (incomingLog)
    777 							if (*incomingLog) {
    778 								RF_ASSERT((*incomingLog)->next == NULL);
    779 								raidPtr->regionInfo[regionID].coreLog = *incomingLog;
    780 								raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
    781 								*incomingLog = NULL;
    782 							} else
    783 								raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
    784 						else
    785 							raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
    786 						/* if a core log is not
    787 						 * available, must queue work
    788 						 * and return */
    789 						if (!raidPtr->regionInfo[regionID].coreLog)
    790 							punt = RF_TRUE;	/* blocked on log
    791 									 * availability */
    792 					}
    793 				}
    794 			}
    795 			/* if we didn't punt on this item, attempt to add a
    796 			 * sector to the core log */
    797 			if (!punt) {
    798 				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
    799 				/* at this point, we have a core log with
    800 				 * enough room for a sector */
    801 				/* copy a sector into the log */
    802 				log = raidPtr->regionInfo[regionID].coreLog;
    803 				RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
    804 				logItem = log->numRecords++;
    805 				log->records[logItem].parityAddr = item->diskAddress;
    806 				RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
    807 				RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
    808 				log->records[logItem].parityAddr.numSector = 1;
    809 				log->records[logItem].operation = item->common->operation;
    810 				bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector));
    811 				item->diskAddress.numSector--;
    812 				item->diskAddress.startSector++;
    813 				if (item->diskAddress.numSector == 0)
    814 					done = RF_TRUE;
    815 			}
    816 		}
    817 
    818 		if (!punt) {
    819 			/* Processed this item completely, decrement count of
    820 			 * items to be processed. */
    821 			RF_ASSERT(item->diskAddress.numSector == 0);
    822 			RF_LOCK_MUTEX(item->common->mutex);
    823 			item->common->cnt--;
    824 			if (item->common->cnt == 0)
    825 				itemDone = RF_TRUE;
    826 			else
    827 				itemDone = RF_FALSE;
    828 			RF_UNLOCK_MUTEX(item->common->mutex);
    829 			if (itemDone) {
    830 				/* Finished processing all log data for this
    831 				 * IO Return structs to free list and invoke
    832 				 * wakeup function. */
    833 				timer = item->common->startTime;	/* grab initial value of
    834 									 * timer */
    835 				RF_ETIMER_STOP(timer);
    836 				RF_ETIMER_EVAL(timer);
    837 				item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
    838 				if (rf_parityLogDebug)
    839 					printf("[waking process for region %d]\n", item->regionID);
    840 				wakeFunc = item->common->wakeFunc;
    841 				wakeArg = item->common->wakeArg;
    842 				FreeParityLogCommonData(item->common);
    843 				FreeParityLogData(item);
    844 				(wakeFunc) (wakeArg, 0);
    845 			} else
    846 				FreeParityLogData(item);
    847 		}
    848 	}
    849 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    850 	if (rf_parityLogDebug)
    851 		printf("[exiting ParityLogAppend]\n");
    852 	return (0);
    853 }
    854 
    855 
    856 void
    857 rf_EnableParityLogging(RF_Raid_t * raidPtr)
    858 {
    859 	int     regionID;
    860 
    861 	for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
    862 		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    863 		raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
    864 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
    865 	}
    866 	if (rf_parityLogDebug)
    867 		printf("[parity logging enabled]\n");
    868 }
    869 #endif				/* RF_INCLUDE_PARITYLOGGING > 0 */
    870