rf_paritylog.c revision 1.3 1 /* $NetBSD: rf_paritylog.c,v 1.3 1999/02/05 00:06:13 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /* Code for manipulating in-core parity logs
30 *
31 */
32
33 #include "rf_archs.h"
34
35 #if RF_INCLUDE_PARITYLOGGING > 0
36
37 /*
38 * Append-only log for recording parity "update" and "overwrite" records
39 */
40
41 #include "rf_types.h"
42 #include "rf_threadstuff.h"
43 #include "rf_mcpair.h"
44 #include "rf_raid.h"
45 #include "rf_dag.h"
46 #include "rf_dagfuncs.h"
47 #include "rf_desc.h"
48 #include "rf_layout.h"
49 #include "rf_diskqueue.h"
50 #include "rf_etimer.h"
51 #include "rf_paritylog.h"
52 #include "rf_general.h"
53 #include "rf_threadid.h"
54 #include "rf_map.h"
55 #include "rf_paritylogging.h"
56 #include "rf_paritylogDiskMgr.h"
57 #include "rf_sys.h"
58
59 static RF_CommonLogData_t *
60 AllocParityLogCommonData(RF_Raid_t * raidPtr)
61 {
62 RF_CommonLogData_t *common = NULL;
63 int rc;
64
65 /* Return a struct for holding common parity log information from the
66 * free list (rf_parityLogDiskQueue.freeCommonList). If the free list
67 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
68
69 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
70 if (raidPtr->parityLogDiskQueue.freeCommonList) {
71 common = raidPtr->parityLogDiskQueue.freeCommonList;
72 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
73 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
74 } else {
75 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
76 RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
77 rc = rf_mutex_init(&common->mutex);
78 if (rc) {
79 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
80 __LINE__, rc);
81 RF_Free(common, sizeof(RF_CommonLogData_t));
82 common = NULL;
83 }
84 }
85 common->next = NULL;
86 return (common);
87 }
88
89 static void
90 FreeParityLogCommonData(RF_CommonLogData_t * common)
91 {
92 RF_Raid_t *raidPtr;
93
94 /* Insert a single struct for holding parity log information (data)
95 * into the free list (rf_parityLogDiskQueue.freeCommonList).
96 * NON-BLOCKING */
97
98 raidPtr = common->raidPtr;
99 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
100 common->next = raidPtr->parityLogDiskQueue.freeCommonList;
101 raidPtr->parityLogDiskQueue.freeCommonList = common;
102 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
103 }
104
105 static RF_ParityLogData_t *
106 AllocParityLogData(RF_Raid_t * raidPtr)
107 {
108 RF_ParityLogData_t *data = NULL;
109
110 /* Return a struct for holding parity log information from the free
111 * list (rf_parityLogDiskQueue.freeList). If the free list is empty,
112 * call RF_Malloc to create a new structure. NON-BLOCKING */
113
114 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
115 if (raidPtr->parityLogDiskQueue.freeDataList) {
116 data = raidPtr->parityLogDiskQueue.freeDataList;
117 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
118 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
119 } else {
120 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
121 RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
122 }
123 data->next = NULL;
124 data->prev = NULL;
125 return (data);
126 }
127
128
129 static void
130 FreeParityLogData(RF_ParityLogData_t * data)
131 {
132 RF_ParityLogData_t *nextItem;
133 RF_Raid_t *raidPtr;
134
135 /* Insert a linked list of structs for holding parity log information
136 * (data) into the free list (parityLogDiskQueue.freeList).
137 * NON-BLOCKING */
138
139 raidPtr = data->common->raidPtr;
140 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
141 while (data) {
142 nextItem = data->next;
143 data->next = raidPtr->parityLogDiskQueue.freeDataList;
144 raidPtr->parityLogDiskQueue.freeDataList = data;
145 data = nextItem;
146 }
147 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
148 }
149
150
151 static void
152 EnqueueParityLogData(
153 RF_ParityLogData_t * data,
154 RF_ParityLogData_t ** head,
155 RF_ParityLogData_t ** tail)
156 {
157 RF_Raid_t *raidPtr;
158
159 /* Insert an in-core parity log (*data) into the head of a disk queue
160 * (*head, *tail). NON-BLOCKING */
161
162 raidPtr = data->common->raidPtr;
163 if (rf_parityLogDebug)
164 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
165 RF_ASSERT(data->prev == NULL);
166 RF_ASSERT(data->next == NULL);
167 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
168 if (*head) {
169 /* insert into head of queue */
170 RF_ASSERT((*head)->prev == NULL);
171 RF_ASSERT((*tail)->next == NULL);
172 data->next = *head;
173 (*head)->prev = data;
174 *head = data;
175 } else {
176 /* insert into empty list */
177 RF_ASSERT(*head == NULL);
178 RF_ASSERT(*tail == NULL);
179 *head = data;
180 *tail = data;
181 }
182 RF_ASSERT((*head)->prev == NULL);
183 RF_ASSERT((*tail)->next == NULL);
184 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
185 }
186
187 static RF_ParityLogData_t *
188 DequeueParityLogData(
189 RF_Raid_t * raidPtr,
190 RF_ParityLogData_t ** head,
191 RF_ParityLogData_t ** tail,
192 int ignoreLocks)
193 {
194 RF_ParityLogData_t *data;
195
196 /* Remove and return an in-core parity log from the tail of a disk
197 * queue (*head, *tail). NON-BLOCKING */
198
199 /* remove from tail, preserving FIFO order */
200 if (!ignoreLocks)
201 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
202 data = *tail;
203 if (data) {
204 if (*head == *tail) {
205 /* removing last item from queue */
206 *head = NULL;
207 *tail = NULL;
208 } else {
209 *tail = (*tail)->prev;
210 (*tail)->next = NULL;
211 RF_ASSERT((*head)->prev == NULL);
212 RF_ASSERT((*tail)->next == NULL);
213 }
214 data->next = NULL;
215 data->prev = NULL;
216 if (rf_parityLogDebug)
217 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
218 }
219 if (*head) {
220 RF_ASSERT((*head)->prev == NULL);
221 RF_ASSERT((*tail)->next == NULL);
222 }
223 if (!ignoreLocks)
224 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
225 return (data);
226 }
227
228
229 static void
230 RequeueParityLogData(
231 RF_ParityLogData_t * data,
232 RF_ParityLogData_t ** head,
233 RF_ParityLogData_t ** tail)
234 {
235 RF_Raid_t *raidPtr;
236
237 /* Insert an in-core parity log (*data) into the tail of a disk queue
238 * (*head, *tail). NON-BLOCKING */
239
240 raidPtr = data->common->raidPtr;
241 RF_ASSERT(data);
242 if (rf_parityLogDebug)
243 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
244 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
245 if (*tail) {
246 /* append to tail of list */
247 data->prev = *tail;
248 data->next = NULL;
249 (*tail)->next = data;
250 *tail = data;
251 } else {
252 /* inserting into an empty list */
253 *head = data;
254 *tail = data;
255 (*head)->prev = NULL;
256 (*tail)->next = NULL;
257 }
258 RF_ASSERT((*head)->prev == NULL);
259 RF_ASSERT((*tail)->next == NULL);
260 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
261 }
262
263 RF_ParityLogData_t *
264 rf_CreateParityLogData(
265 RF_ParityRecordType_t operation,
266 RF_PhysDiskAddr_t * pda,
267 caddr_t bufPtr,
268 RF_Raid_t * raidPtr,
269 int (*wakeFunc) (RF_DagNode_t * node, int status),
270 void *wakeArg,
271 RF_AccTraceEntry_t * tracerec,
272 RF_Etimer_t startTime)
273 {
274 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
275 RF_CommonLogData_t *common;
276 RF_PhysDiskAddr_t *diskAddress;
277 int boundary, offset = 0;
278
279 /* Return an initialized struct of info to be logged. Build one item
280 * per physical disk address, one item per region.
281 *
282 * NON-BLOCKING */
283
284 diskAddress = pda;
285 common = AllocParityLogCommonData(raidPtr);
286 RF_ASSERT(common);
287
288 common->operation = operation;
289 common->bufPtr = bufPtr;
290 common->raidPtr = raidPtr;
291 common->wakeFunc = wakeFunc;
292 common->wakeArg = wakeArg;
293 common->tracerec = tracerec;
294 common->startTime = startTime;
295 common->cnt = 0;
296
297 if (rf_parityLogDebug)
298 printf("[entering CreateParityLogData]\n");
299 while (diskAddress) {
300 common->cnt++;
301 data = AllocParityLogData(raidPtr);
302 RF_ASSERT(data);
303 data->common = common;
304 data->next = NULL;
305 data->prev = NULL;
306 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
307 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
308 /* disk address does not cross a region boundary */
309 data->diskAddress = *diskAddress;
310 data->bufOffset = offset;
311 offset = offset + diskAddress->numSector;
312 EnqueueParityLogData(data, &resultHead, &resultTail);
313 /* adjust disk address */
314 diskAddress = diskAddress->next;
315 } else {
316 /* disk address crosses a region boundary */
317 /* find address where region is crossed */
318 boundary = 0;
319 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
320 boundary++;
321
322 /* enter data before the boundary */
323 data->diskAddress = *diskAddress;
324 data->diskAddress.numSector = boundary;
325 data->bufOffset = offset;
326 offset += boundary;
327 EnqueueParityLogData(data, &resultHead, &resultTail);
328 /* adjust disk address */
329 diskAddress->startSector += boundary;
330 diskAddress->numSector -= boundary;
331 }
332 }
333 if (rf_parityLogDebug)
334 printf("[leaving CreateParityLogData]\n");
335 return (resultHead);
336 }
337
338
339 RF_ParityLogData_t *
340 rf_SearchAndDequeueParityLogData(
341 RF_Raid_t * raidPtr,
342 int regionID,
343 RF_ParityLogData_t ** head,
344 RF_ParityLogData_t ** tail,
345 int ignoreLocks)
346 {
347 RF_ParityLogData_t *w;
348
349 /* Remove and return an in-core parity log from a specified region
350 * (regionID). If a matching log is not found, return NULL.
351 *
352 * NON-BLOCKING. */
353
354 /* walk backward through a list, looking for an entry with a matching
355 * region ID */
356 if (!ignoreLocks)
357 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
358 w = (*tail);
359 while (w) {
360 if (w->regionID == regionID) {
361 /* remove an element from the list */
362 if (w == *tail) {
363 if (*head == *tail) {
364 /* removing only element in the list */
365 *head = NULL;
366 *tail = NULL;
367 } else {
368 /* removing last item in the list */
369 *tail = (*tail)->prev;
370 (*tail)->next = NULL;
371 RF_ASSERT((*head)->prev == NULL);
372 RF_ASSERT((*tail)->next == NULL);
373 }
374 } else {
375 if (w == *head) {
376 /* removing first item in the list */
377 *head = (*head)->next;
378 (*head)->prev = NULL;
379 RF_ASSERT((*head)->prev == NULL);
380 RF_ASSERT((*tail)->next == NULL);
381 } else {
382 /* removing an item from the middle of
383 * the list */
384 w->prev->next = w->next;
385 w->next->prev = w->prev;
386 RF_ASSERT((*head)->prev == NULL);
387 RF_ASSERT((*tail)->next == NULL);
388 }
389 }
390 w->prev = NULL;
391 w->next = NULL;
392 if (rf_parityLogDebug)
393 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
394 return (w);
395 } else
396 w = w->prev;
397 }
398 if (!ignoreLocks)
399 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
400 return (NULL);
401 }
402
403 static RF_ParityLogData_t *
404 DequeueMatchingLogData(
405 RF_Raid_t * raidPtr,
406 RF_ParityLogData_t ** head,
407 RF_ParityLogData_t ** tail)
408 {
409 RF_ParityLogData_t *logDataList, *logData;
410 int regionID;
411
412 /* Remove and return an in-core parity log from the tail of a disk
413 * queue (*head, *tail). Then remove all matching (identical
414 * regionIDs) logData and return as a linked list.
415 *
416 * NON-BLOCKING */
417
418 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
419 if (logDataList) {
420 regionID = logDataList->regionID;
421 logData = logDataList;
422 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
423 while (logData->next) {
424 logData = logData->next;
425 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
426 }
427 }
428 return (logDataList);
429 }
430
431
432 static RF_ParityLog_t *
433 AcquireParityLog(
434 RF_ParityLogData_t * logData,
435 int finish)
436 {
437 RF_ParityLog_t *log = NULL;
438 RF_Raid_t *raidPtr;
439
440 /* Grab a log buffer from the pool and return it. If no buffers are
441 * available, return NULL. NON-BLOCKING */
442 raidPtr = logData->common->raidPtr;
443 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
444 if (raidPtr->parityLogPool.parityLogs) {
445 log = raidPtr->parityLogPool.parityLogs;
446 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
447 log->regionID = logData->regionID;
448 log->numRecords = 0;
449 log->next = NULL;
450 raidPtr->logsInUse++;
451 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
452 } else {
453 /* no logs available, so place ourselves on the queue of work
454 * waiting on log buffers this is done while
455 * parityLogPool.mutex is held, to ensure synchronization with
456 * ReleaseParityLogs. */
457 if (rf_parityLogDebug)
458 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
459 if (finish)
460 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
461 else
462 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
463 }
464 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
465 return (log);
466 }
467
468 void
469 rf_ReleaseParityLogs(
470 RF_Raid_t * raidPtr,
471 RF_ParityLog_t * firstLog)
472 {
473 RF_ParityLogData_t *logDataList;
474 RF_ParityLog_t *log, *lastLog;
475 int cnt;
476
477 /* Insert a linked list of parity logs (firstLog) to the free list
478 * (parityLogPool.parityLogPool)
479 *
480 * NON-BLOCKING. */
481
482 RF_ASSERT(firstLog);
483
484 /* Before returning logs to global free list, service all requests
485 * which are blocked on logs. Holding mutexes for parityLogPool and
486 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
487 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
488 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
489 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
490 log = firstLog;
491 if (firstLog)
492 firstLog = firstLog->next;
493 log->numRecords = 0;
494 log->next = NULL;
495 while (logDataList && log) {
496 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
497 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
498 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
499 if (rf_parityLogDebug)
500 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
501 if (log == NULL) {
502 log = firstLog;
503 if (firstLog) {
504 firstLog = firstLog->next;
505 log->numRecords = 0;
506 log->next = NULL;
507 }
508 }
509 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
510 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
511 if (log)
512 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
513 }
514 /* return remaining logs to pool */
515 if (log) {
516 log->next = firstLog;
517 firstLog = log;
518 }
519 if (firstLog) {
520 lastLog = firstLog;
521 raidPtr->logsInUse--;
522 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
523 while (lastLog->next) {
524 lastLog = lastLog->next;
525 raidPtr->logsInUse--;
526 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
527 }
528 lastLog->next = raidPtr->parityLogPool.parityLogs;
529 raidPtr->parityLogPool.parityLogs = firstLog;
530 cnt = 0;
531 log = raidPtr->parityLogPool.parityLogs;
532 while (log) {
533 cnt++;
534 log = log->next;
535 }
536 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
537 }
538 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
539 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
540 }
541
542 static void
543 ReintLog(
544 RF_Raid_t * raidPtr,
545 int regionID,
546 RF_ParityLog_t * log)
547 {
548 RF_ASSERT(log);
549
550 /* Insert an in-core parity log (log) into the disk queue of
551 * reintegration work. Set the flag (reintInProgress) for the
552 * specified region (regionID) to indicate that reintegration is in
553 * progress for this region. NON-BLOCKING */
554
555 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
556 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint
557 * complete */
558
559 if (rf_parityLogDebug)
560 printf("[requesting reintegration of region %d]\n", log->regionID);
561 /* move record to reintegration queue */
562 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
563 log->next = raidPtr->parityLogDiskQueue.reintQueue;
564 raidPtr->parityLogDiskQueue.reintQueue = log;
565 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
566 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
567 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
568 }
569
570 static void
571 FlushLog(
572 RF_Raid_t * raidPtr,
573 RF_ParityLog_t * log)
574 {
575 /* insert a core log (log) into a list of logs
576 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
577 * NON-BLOCKING */
578
579 RF_ASSERT(log);
580 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
581 RF_ASSERT(log->next == NULL);
582 /* move log to flush queue */
583 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
584 log->next = raidPtr->parityLogDiskQueue.flushQueue;
585 raidPtr->parityLogDiskQueue.flushQueue = log;
586 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
587 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
588 }
589
590 static int
591 DumpParityLogToDisk(
592 int finish,
593 RF_ParityLogData_t * logData)
594 {
595 int i, diskCount, regionID = logData->regionID;
596 RF_ParityLog_t *log;
597 RF_Raid_t *raidPtr;
598
599 raidPtr = logData->common->raidPtr;
600
601 /* Move a core log to disk. If the log disk is full, initiate
602 * reintegration.
603 *
604 * Return (0) if we can enqueue the dump immediately, otherwise return
605 * (1) to indicate we are blocked on reintegration and control of the
606 * thread should be relinquished.
607 *
608 * Caller must hold regionInfo[regionID].mutex
609 *
610 * NON-BLOCKING */
611
612 if (rf_parityLogDebug)
613 printf("[dumping parity log to disk, region %d]\n", regionID);
614 log = raidPtr->regionInfo[regionID].coreLog;
615 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
616 RF_ASSERT(log->next == NULL);
617
618 /* if reintegration is in progress, must queue work */
619 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
620 if (raidPtr->regionInfo[regionID].reintInProgress) {
621 /* Can not proceed since this region is currently being
622 * reintegrated. We can not block, so queue remaining work and
623 * return */
624 if (rf_parityLogDebug)
625 printf("[region %d waiting on reintegration]\n", regionID);
626 /* XXX not sure about the use of finish - shouldn't this
627 * always be "Enqueue"? */
628 if (finish)
629 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
630 else
631 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
632 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
633 return (1); /* relenquish control of this thread */
634 }
635 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
636 raidPtr->regionInfo[regionID].coreLog = NULL;
637 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
638 /* IMPORTANT!! this loop bound assumes region disk holds an
639 * integral number of core logs */
640 {
641 /* update disk map for this region */
642 diskCount = raidPtr->regionInfo[regionID].diskCount;
643 for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
644 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
645 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
646 }
647 log->diskOffset = diskCount;
648 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
649 FlushLog(raidPtr, log);
650 } else {
651 /* no room for log on disk, send it to disk manager and
652 * request reintegration */
653 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
654 ReintLog(raidPtr, regionID, log);
655 }
656 if (rf_parityLogDebug)
657 printf("[finished dumping parity log to disk, region %d]\n", regionID);
658 return (0);
659 }
660
661 int
662 rf_ParityLogAppend(
663 RF_ParityLogData_t * logData,
664 int finish,
665 RF_ParityLog_t ** incomingLog,
666 int clearReintFlag)
667 {
668 int regionID, logItem, itemDone;
669 RF_ParityLogData_t *item;
670 int punt, done = RF_FALSE;
671 RF_ParityLog_t *log;
672 RF_Raid_t *raidPtr;
673 RF_Etimer_t timer;
674 int (*wakeFunc) (RF_DagNode_t * node, int status);
675 void *wakeArg;
676
677 /* Add parity to the appropriate log, one sector at a time. This
678 * routine is called is called by dag functions ParityLogUpdateFunc
679 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
680 *
681 * Parity to be logged is contained in a linked-list (logData). When
682 * this routine returns, every sector in the list will be in one of
683 * three places: 1) entered into the parity log 2) queued, waiting on
684 * reintegration 3) queued, waiting on a core log
685 *
686 * Blocked work is passed to the ParityLoggingDiskManager for completion.
687 * Later, as conditions which required the block are removed, the work
688 * reenters this routine with the "finish" parameter set to "RF_TRUE."
689 *
690 * NON-BLOCKING */
691
692 raidPtr = logData->common->raidPtr;
693 /* lock the region for the first item in logData */
694 RF_ASSERT(logData != NULL);
695 regionID = logData->regionID;
696 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
697 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
698
699 if (clearReintFlag) {
700 /* Enable flushing for this region. Holding both locks
701 * provides a synchronization barrier with DumpParityLogToDisk */
702 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
703 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
704 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
705 raidPtr->regionInfo[regionID].diskCount = 0;
706 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
707 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now
708 * enabled */
709 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
710 }
711 /* process each item in logData */
712 while (logData) {
713 /* remove an item from logData */
714 item = logData;
715 logData = logData->next;
716 item->next = NULL;
717 item->prev = NULL;
718
719 if (rf_parityLogDebug)
720 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
721
722 /* see if we moved to a new region */
723 if (regionID != item->regionID) {
724 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
725 regionID = item->regionID;
726 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
727 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
728 }
729 punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This
730 * can happen in one of two ways: 1) no core
731 * log (AcquireParityLog) 2) waiting on
732 * reintegration (DumpParityLogToDisk) If punt
733 * is RF_TRUE, the dataItem was queued, so
734 * skip to next item. */
735
736 /* process item, one sector at a time, until all sectors
737 * processed or we punt */
738 if (item->diskAddress.numSector > 0)
739 done = RF_FALSE;
740 else
741 RF_ASSERT(0);
742 while (!punt && !done) {
743 /* verify that a core log exists for this region */
744 if (!raidPtr->regionInfo[regionID].coreLog) {
745 /* Attempt to acquire a parity log. If
746 * acquisition fails, queue remaining work in
747 * data item and move to nextItem. */
748 if (incomingLog)
749 if (*incomingLog) {
750 RF_ASSERT((*incomingLog)->next == NULL);
751 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
752 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
753 *incomingLog = NULL;
754 } else
755 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
756 else
757 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
758 /* Note: AcquireParityLog either returns a log
759 * or enqueues currentItem */
760 }
761 if (!raidPtr->regionInfo[regionID].coreLog)
762 punt = RF_TRUE; /* failed to find a core log */
763 else {
764 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
765 /* verify that the log has room for new
766 * entries */
767 /* if log is full, dump it to disk and grab a
768 * new log */
769 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
770 /* log is full, dump it to disk */
771 if (DumpParityLogToDisk(finish, item))
772 punt = RF_TRUE; /* dump unsuccessful,
773 * blocked on
774 * reintegration */
775 else {
776 /* dump was successful */
777 if (incomingLog)
778 if (*incomingLog) {
779 RF_ASSERT((*incomingLog)->next == NULL);
780 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
781 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
782 *incomingLog = NULL;
783 } else
784 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
785 else
786 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
787 /* if a core log is not
788 * available, must queue work
789 * and return */
790 if (!raidPtr->regionInfo[regionID].coreLog)
791 punt = RF_TRUE; /* blocked on log
792 * availability */
793 }
794 }
795 }
796 /* if we didn't punt on this item, attempt to add a
797 * sector to the core log */
798 if (!punt) {
799 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
800 /* at this point, we have a core log with
801 * enough room for a sector */
802 /* copy a sector into the log */
803 log = raidPtr->regionInfo[regionID].coreLog;
804 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
805 logItem = log->numRecords++;
806 log->records[logItem].parityAddr = item->diskAddress;
807 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
808 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
809 log->records[logItem].parityAddr.numSector = 1;
810 log->records[logItem].operation = item->common->operation;
811 bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector));
812 item->diskAddress.numSector--;
813 item->diskAddress.startSector++;
814 if (item->diskAddress.numSector == 0)
815 done = RF_TRUE;
816 }
817 }
818
819 if (!punt) {
820 /* Processed this item completely, decrement count of
821 * items to be processed. */
822 RF_ASSERT(item->diskAddress.numSector == 0);
823 RF_LOCK_MUTEX(item->common->mutex);
824 item->common->cnt--;
825 if (item->common->cnt == 0)
826 itemDone = RF_TRUE;
827 else
828 itemDone = RF_FALSE;
829 RF_UNLOCK_MUTEX(item->common->mutex);
830 if (itemDone) {
831 /* Finished processing all log data for this
832 * IO Return structs to free list and invoke
833 * wakeup function. */
834 timer = item->common->startTime; /* grab initial value of
835 * timer */
836 RF_ETIMER_STOP(timer);
837 RF_ETIMER_EVAL(timer);
838 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
839 if (rf_parityLogDebug)
840 printf("[waking process for region %d]\n", item->regionID);
841 wakeFunc = item->common->wakeFunc;
842 wakeArg = item->common->wakeArg;
843 FreeParityLogCommonData(item->common);
844 FreeParityLogData(item);
845 (wakeFunc) (wakeArg, 0);
846 } else
847 FreeParityLogData(item);
848 }
849 }
850 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
851 if (rf_parityLogDebug)
852 printf("[exiting ParityLogAppend]\n");
853 return (0);
854 }
855
856
857 void
858 rf_EnableParityLogging(RF_Raid_t * raidPtr)
859 {
860 int regionID;
861
862 for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
863 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
864 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
865 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
866 }
867 if (rf_parityLogDebug)
868 printf("[parity logging enabled]\n");
869 }
870 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
871