rf_paritylog.c revision 1.2 1 /* $NetBSD: rf_paritylog.c,v 1.2 1999/01/26 02:33:59 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /* Code for manipulating in-core parity logs
30 *
31 */
32
33 #include "rf_archs.h"
34
35 #if RF_INCLUDE_PARITYLOGGING > 0
36
37 /*
38 * Append-only log for recording parity "update" and "overwrite" records
39 */
40
41 #include "rf_types.h"
42 #include "rf_threadstuff.h"
43 #include "rf_mcpair.h"
44 #include "rf_raid.h"
45 #include "rf_dag.h"
46 #include "rf_dagfuncs.h"
47 #include "rf_desc.h"
48 #include "rf_layout.h"
49 #include "rf_diskqueue.h"
50 #include "rf_etimer.h"
51 #include "rf_paritylog.h"
52 #include "rf_general.h"
53 #include "rf_threadid.h"
54 #include "rf_map.h"
55 #include "rf_paritylogging.h"
56 #include "rf_paritylogDiskMgr.h"
57 #include "rf_sys.h"
58
59 static RF_CommonLogData_t *AllocParityLogCommonData(RF_Raid_t *raidPtr)
60 {
61 RF_CommonLogData_t *common = NULL;
62 int rc;
63
64 /* Return a struct for holding common parity log information from the free
65 list (rf_parityLogDiskQueue.freeCommonList). If the free list is empty, call
66 RF_Malloc to create a new structure.
67 NON-BLOCKING */
68
69 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
70 if (raidPtr->parityLogDiskQueue.freeCommonList)
71 {
72 common = raidPtr->parityLogDiskQueue.freeCommonList;
73 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
74 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
75 }
76 else
77 {
78 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
79 RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
80 rc = rf_mutex_init(&common->mutex);
81 if (rc) {
82 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
83 __LINE__, rc);
84 RF_Free(common, sizeof(RF_CommonLogData_t));
85 common = NULL;
86 }
87 }
88 common->next = NULL;
89 return(common);
90 }
91
92 static void FreeParityLogCommonData(RF_CommonLogData_t *common)
93 {
94 RF_Raid_t *raidPtr;
95
96 /* Insert a single struct for holding parity log information
97 (data) into the free list (rf_parityLogDiskQueue.freeCommonList).
98 NON-BLOCKING */
99
100 raidPtr = common->raidPtr;
101 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
102 common->next = raidPtr->parityLogDiskQueue.freeCommonList;
103 raidPtr->parityLogDiskQueue.freeCommonList = common;
104 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
105 }
106
107 static RF_ParityLogData_t *AllocParityLogData(RF_Raid_t *raidPtr)
108 {
109 RF_ParityLogData_t *data = NULL;
110
111 /* Return a struct for holding parity log information from the free
112 list (rf_parityLogDiskQueue.freeList). If the free list is empty, call
113 RF_Malloc to create a new structure.
114 NON-BLOCKING */
115
116 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
117 if (raidPtr->parityLogDiskQueue.freeDataList)
118 {
119 data = raidPtr->parityLogDiskQueue.freeDataList;
120 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
121 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
122 }
123 else
124 {
125 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
126 RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
127 }
128 data->next = NULL;
129 data->prev = NULL;
130 return(data);
131 }
132
133
134 static void FreeParityLogData(RF_ParityLogData_t *data)
135 {
136 RF_ParityLogData_t *nextItem;
137 RF_Raid_t *raidPtr;
138
139 /* Insert a linked list of structs for holding parity log
140 information (data) into the free list (parityLogDiskQueue.freeList).
141 NON-BLOCKING */
142
143 raidPtr = data->common->raidPtr;
144 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
145 while (data)
146 {
147 nextItem = data->next;
148 data->next = raidPtr->parityLogDiskQueue.freeDataList;
149 raidPtr->parityLogDiskQueue.freeDataList = data;
150 data = nextItem;
151 }
152 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
153 }
154
155
156 static void EnqueueParityLogData(
157 RF_ParityLogData_t *data,
158 RF_ParityLogData_t **head,
159 RF_ParityLogData_t **tail)
160 {
161 RF_Raid_t *raidPtr;
162
163 /* Insert an in-core parity log (*data) into the head of
164 a disk queue (*head, *tail).
165 NON-BLOCKING */
166
167 raidPtr = data->common->raidPtr;
168 if (rf_parityLogDebug)
169 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int)data->diskAddress.numSector);
170 RF_ASSERT(data->prev == NULL);
171 RF_ASSERT(data->next == NULL);
172 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
173 if (*head)
174 {
175 /* insert into head of queue */
176 RF_ASSERT((*head)->prev == NULL);
177 RF_ASSERT((*tail)->next == NULL);
178 data->next = *head;
179 (*head)->prev = data;
180 *head = data;
181 }
182 else
183 {
184 /* insert into empty list */
185 RF_ASSERT(*head == NULL);
186 RF_ASSERT(*tail == NULL);
187 *head = data;
188 *tail = data;
189 }
190 RF_ASSERT((*head)->prev == NULL);
191 RF_ASSERT((*tail)->next == NULL);
192 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
193 }
194
195 static RF_ParityLogData_t *DequeueParityLogData(
196 RF_Raid_t *raidPtr,
197 RF_ParityLogData_t **head,
198 RF_ParityLogData_t **tail,
199 int ignoreLocks)
200 {
201 RF_ParityLogData_t *data;
202
203 /* Remove and return an in-core parity log from the tail of
204 a disk queue (*head, *tail).
205 NON-BLOCKING */
206
207 /* remove from tail, preserving FIFO order */
208 if (!ignoreLocks)
209 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
210 data = *tail;
211 if (data)
212 {
213 if (*head == *tail)
214 {
215 /* removing last item from queue */
216 *head = NULL;
217 *tail = NULL;
218 }
219 else
220 {
221 *tail = (*tail)->prev;
222 (*tail)->next = NULL;
223 RF_ASSERT((*head)->prev == NULL);
224 RF_ASSERT((*tail)->next == NULL);
225 }
226 data->next = NULL;
227 data->prev = NULL;
228 if (rf_parityLogDebug)
229 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int)data->diskAddress.numSector);
230 }
231 if (*head)
232 {
233 RF_ASSERT((*head)->prev == NULL);
234 RF_ASSERT((*tail)->next == NULL);
235 }
236 if (!ignoreLocks)
237 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
238 return(data);
239 }
240
241
242 static void RequeueParityLogData(
243 RF_ParityLogData_t *data,
244 RF_ParityLogData_t **head,
245 RF_ParityLogData_t **tail)
246 {
247 RF_Raid_t *raidPtr;
248
249 /* Insert an in-core parity log (*data) into the tail of
250 a disk queue (*head, *tail).
251 NON-BLOCKING */
252
253 raidPtr = data->common->raidPtr;
254 RF_ASSERT(data);
255 if (rf_parityLogDebug)
256 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
257 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
258 if (*tail)
259 {
260 /* append to tail of list */
261 data->prev = *tail;
262 data->next = NULL;
263 (*tail)->next = data;
264 *tail = data;
265 }
266 else
267 {
268 /* inserting into an empty list */
269 *head = data;
270 *tail = data;
271 (*head)->prev = NULL;
272 (*tail)->next = NULL;
273 }
274 RF_ASSERT((*head)->prev == NULL);
275 RF_ASSERT((*tail)->next == NULL);
276 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
277 }
278
279 RF_ParityLogData_t *rf_CreateParityLogData(
280 RF_ParityRecordType_t operation,
281 RF_PhysDiskAddr_t *pda,
282 caddr_t bufPtr,
283 RF_Raid_t *raidPtr,
284 int (*wakeFunc)(RF_DagNode_t *node, int status),
285 void *wakeArg,
286 RF_AccTraceEntry_t *tracerec,
287 RF_Etimer_t startTime)
288 {
289 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
290 RF_CommonLogData_t *common;
291 RF_PhysDiskAddr_t *diskAddress;
292 int boundary, offset = 0;
293
294 /* Return an initialized struct of info to be logged.
295 Build one item per physical disk address, one item per region.
296
297 NON-BLOCKING */
298
299 diskAddress = pda;
300 common = AllocParityLogCommonData(raidPtr);
301 RF_ASSERT(common);
302
303 common->operation = operation;
304 common->bufPtr = bufPtr;
305 common->raidPtr = raidPtr;
306 common->wakeFunc = wakeFunc;
307 common->wakeArg = wakeArg;
308 common->tracerec = tracerec;
309 common->startTime = startTime;
310 common->cnt = 0;
311
312 if (rf_parityLogDebug)
313 printf("[entering CreateParityLogData]\n");
314 while (diskAddress)
315 {
316 common->cnt++;
317 data = AllocParityLogData(raidPtr);
318 RF_ASSERT(data);
319 data->common = common;
320 data->next = NULL;
321 data->prev = NULL;
322 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
323 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1))
324 {
325 /* disk address does not cross a region boundary */
326 data->diskAddress = *diskAddress;
327 data->bufOffset = offset;
328 offset = offset + diskAddress->numSector;
329 EnqueueParityLogData(data, &resultHead, &resultTail);
330 /* adjust disk address */
331 diskAddress = diskAddress->next;
332 }
333 else
334 {
335 /* disk address crosses a region boundary */
336 /* find address where region is crossed */
337 boundary = 0;
338 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
339 boundary++;
340
341 /* enter data before the boundary */
342 data->diskAddress = *diskAddress;
343 data->diskAddress.numSector = boundary;
344 data->bufOffset = offset;
345 offset += boundary;
346 EnqueueParityLogData(data, &resultHead, &resultTail);
347 /* adjust disk address */
348 diskAddress->startSector += boundary;
349 diskAddress->numSector -= boundary;
350 }
351 }
352 if (rf_parityLogDebug)
353 printf("[leaving CreateParityLogData]\n");
354 return(resultHead);
355 }
356
357
358 RF_ParityLogData_t *rf_SearchAndDequeueParityLogData(
359 RF_Raid_t *raidPtr,
360 int regionID,
361 RF_ParityLogData_t **head,
362 RF_ParityLogData_t **tail,
363 int ignoreLocks)
364 {
365 RF_ParityLogData_t *w;
366
367 /* Remove and return an in-core parity log from a specified region (regionID).
368 If a matching log is not found, return NULL.
369
370 NON-BLOCKING.
371 */
372
373 /* walk backward through a list, looking for an entry with a matching region ID */
374 if (!ignoreLocks)
375 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
376 w = (*tail);
377 while (w)
378 {
379 if (w->regionID == regionID)
380 {
381 /* remove an element from the list */
382 if (w == *tail)
383 {
384 if (*head == *tail)
385 {
386 /* removing only element in the list */
387 *head = NULL;
388 *tail = NULL;
389 }
390 else
391 {
392 /* removing last item in the list */
393 *tail = (*tail)->prev;
394 (*tail)->next = NULL;
395 RF_ASSERT((*head)->prev == NULL);
396 RF_ASSERT((*tail)->next == NULL);
397 }
398 }
399 else
400 {
401 if (w == *head)
402 {
403 /* removing first item in the list */
404 *head = (*head)->next;
405 (*head)->prev = NULL;
406 RF_ASSERT((*head)->prev == NULL);
407 RF_ASSERT((*tail)->next == NULL);
408 }
409 else
410 {
411 /* removing an item from the middle of the list */
412 w->prev->next = w->next;
413 w->next->prev = w->prev;
414 RF_ASSERT((*head)->prev == NULL);
415 RF_ASSERT((*tail)->next == NULL);
416 }
417 }
418 w->prev = NULL;
419 w->next = NULL;
420 if (rf_parityLogDebug)
421 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n",w->regionID,(int)w->diskAddress.raidAddress,(int) w->diskAddress.numSector);
422 return(w);
423 }
424 else
425 w = w->prev;
426 }
427 if (!ignoreLocks)
428 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
429 return(NULL);
430 }
431
432 static RF_ParityLogData_t *DequeueMatchingLogData(
433 RF_Raid_t *raidPtr,
434 RF_ParityLogData_t **head,
435 RF_ParityLogData_t **tail)
436 {
437 RF_ParityLogData_t *logDataList, *logData;
438 int regionID;
439
440 /* Remove and return an in-core parity log from the tail of
441 a disk queue (*head, *tail). Then remove all matching
442 (identical regionIDs) logData and return as a linked list.
443
444 NON-BLOCKING
445 */
446
447 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
448 if (logDataList)
449 {
450 regionID = logDataList->regionID;
451 logData = logDataList;
452 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
453 while (logData->next)
454 {
455 logData = logData->next;
456 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
457 }
458 }
459 return(logDataList);
460 }
461
462
463 static RF_ParityLog_t *AcquireParityLog(
464 RF_ParityLogData_t *logData,
465 int finish)
466 {
467 RF_ParityLog_t *log = NULL;
468 RF_Raid_t *raidPtr;
469
470 /* Grab a log buffer from the pool and return it.
471 If no buffers are available, return NULL.
472 NON-BLOCKING
473 */
474 raidPtr = logData->common->raidPtr;
475 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
476 if (raidPtr->parityLogPool.parityLogs)
477 {
478 log = raidPtr->parityLogPool.parityLogs;
479 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
480 log->regionID = logData->regionID;
481 log->numRecords = 0;
482 log->next = NULL;
483 raidPtr->logsInUse++;
484 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
485 }
486 else
487 {
488 /* no logs available, so place ourselves on the queue of work waiting on log buffers
489 this is done while parityLogPool.mutex is held, to ensure synchronization
490 with ReleaseParityLogs.
491 */
492 if (rf_parityLogDebug)
493 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
494 if (finish)
495 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
496 else
497 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
498 }
499 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
500 return(log);
501 }
502
503 void rf_ReleaseParityLogs(
504 RF_Raid_t *raidPtr,
505 RF_ParityLog_t *firstLog)
506 {
507 RF_ParityLogData_t *logDataList;
508 RF_ParityLog_t *log, *lastLog;
509 int cnt;
510
511 /* Insert a linked list of parity logs (firstLog) to
512 the free list (parityLogPool.parityLogPool)
513
514 NON-BLOCKING.
515 */
516
517 RF_ASSERT(firstLog);
518
519 /* Before returning logs to global free list, service all
520 requests which are blocked on logs. Holding mutexes for parityLogPool and parityLogDiskQueue
521 forces synchronization with AcquireParityLog().
522 */
523 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
524 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
525 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
526 log = firstLog;
527 if (firstLog)
528 firstLog = firstLog->next;
529 log->numRecords = 0;
530 log->next = NULL;
531 while (logDataList && log)
532 {
533 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
534 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
535 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
536 if (rf_parityLogDebug)
537 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
538 if (log == NULL)
539 {
540 log = firstLog;
541 if (firstLog)
542 {
543 firstLog = firstLog->next;
544 log->numRecords = 0;
545 log->next = NULL;
546 }
547 }
548 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
549 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
550 if (log)
551 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
552 }
553 /* return remaining logs to pool */
554 if (log)
555 {
556 log->next = firstLog;
557 firstLog = log;
558 }
559 if (firstLog)
560 {
561 lastLog = firstLog;
562 raidPtr->logsInUse--;
563 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
564 while (lastLog->next)
565 {
566 lastLog = lastLog->next;
567 raidPtr->logsInUse--;
568 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
569 }
570 lastLog->next = raidPtr->parityLogPool.parityLogs;
571 raidPtr->parityLogPool.parityLogs = firstLog;
572 cnt = 0;
573 log = raidPtr->parityLogPool.parityLogs;
574 while (log)
575 {
576 cnt++;
577 log = log->next;
578 }
579 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
580 }
581 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
582 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
583 }
584
585 static void ReintLog(
586 RF_Raid_t *raidPtr,
587 int regionID,
588 RF_ParityLog_t *log)
589 {
590 RF_ASSERT(log);
591
592 /* Insert an in-core parity log (log) into the disk queue of reintegration
593 work. Set the flag (reintInProgress) for the specified region (regionID)
594 to indicate that reintegration is in progress for this region.
595 NON-BLOCKING
596 */
597
598 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
599 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint complete */
600
601 if (rf_parityLogDebug)
602 printf("[requesting reintegration of region %d]\n", log->regionID);
603 /* move record to reintegration queue */
604 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
605 log->next = raidPtr->parityLogDiskQueue.reintQueue;
606 raidPtr->parityLogDiskQueue.reintQueue = log;
607 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
608 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
609 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
610 }
611
612 static void FlushLog(
613 RF_Raid_t *raidPtr,
614 RF_ParityLog_t *log)
615 {
616 /* insert a core log (log) into a list of logs (parityLogDiskQueue.flushQueue)
617 waiting to be written to disk.
618 NON-BLOCKING
619 */
620
621 RF_ASSERT(log);
622 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
623 RF_ASSERT(log->next == NULL);
624 /* move log to flush queue */
625 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
626 log->next = raidPtr->parityLogDiskQueue.flushQueue;
627 raidPtr->parityLogDiskQueue.flushQueue = log;
628 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
629 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
630 }
631
632 static int DumpParityLogToDisk(
633 int finish,
634 RF_ParityLogData_t *logData)
635 {
636 int i, diskCount, regionID = logData->regionID;
637 RF_ParityLog_t *log;
638 RF_Raid_t *raidPtr;
639
640 raidPtr = logData->common->raidPtr;
641
642 /* Move a core log to disk. If the log disk is full, initiate
643 reintegration.
644
645 Return (0) if we can enqueue the dump immediately, otherwise
646 return (1) to indicate we are blocked on reintegration and
647 control of the thread should be relinquished.
648
649 Caller must hold regionInfo[regionID].mutex
650
651 NON-BLOCKING
652 */
653
654 if (rf_parityLogDebug)
655 printf("[dumping parity log to disk, region %d]\n", regionID);
656 log = raidPtr->regionInfo[regionID].coreLog;
657 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
658 RF_ASSERT(log->next == NULL);
659
660 /* if reintegration is in progress, must queue work */
661 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
662 if (raidPtr->regionInfo[regionID].reintInProgress)
663 {
664 /* Can not proceed since this region is currently being reintegrated.
665 We can not block, so queue remaining work and return */
666 if (rf_parityLogDebug)
667 printf("[region %d waiting on reintegration]\n",regionID);
668 /* XXX not sure about the use of finish - shouldn't this always be "Enqueue"? */
669 if (finish)
670 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
671 else
672 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
673 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
674 return(1); /* relenquish control of this thread */
675 }
676 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
677 raidPtr->regionInfo[regionID].coreLog = NULL;
678 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
679 /* IMPORTANT!! this loop bound assumes region disk holds an integral number of core logs */
680 {
681 /* update disk map for this region */
682 diskCount = raidPtr->regionInfo[regionID].diskCount;
683 for (i = 0; i < raidPtr->numSectorsPerLog; i++)
684 {
685 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
686 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
687 }
688 log->diskOffset = diskCount;
689 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
690 FlushLog(raidPtr, log);
691 }
692 else
693 {
694 /* no room for log on disk, send it to disk manager and request reintegration */
695 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
696 ReintLog(raidPtr, regionID, log);
697 }
698 if (rf_parityLogDebug)
699 printf("[finished dumping parity log to disk, region %d]\n", regionID);
700 return(0);
701 }
702
703 int rf_ParityLogAppend(
704 RF_ParityLogData_t *logData,
705 int finish,
706 RF_ParityLog_t **incomingLog,
707 int clearReintFlag)
708 {
709 int regionID, logItem, itemDone;
710 RF_ParityLogData_t *item;
711 int punt, done = RF_FALSE;
712 RF_ParityLog_t *log;
713 RF_Raid_t *raidPtr;
714 RF_Etimer_t timer;
715 int (*wakeFunc)(RF_DagNode_t *node, int status);
716 void *wakeArg;
717
718 /* Add parity to the appropriate log, one sector at a time.
719 This routine is called is called by dag functions ParityLogUpdateFunc
720 and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
721
722 Parity to be logged is contained in a linked-list (logData). When
723 this routine returns, every sector in the list will be in one of
724 three places:
725 1) entered into the parity log
726 2) queued, waiting on reintegration
727 3) queued, waiting on a core log
728
729 Blocked work is passed to the ParityLoggingDiskManager for completion.
730 Later, as conditions which required the block are removed, the work
731 reenters this routine with the "finish" parameter set to "RF_TRUE."
732
733 NON-BLOCKING
734 */
735
736 raidPtr = logData->common->raidPtr;
737 /* lock the region for the first item in logData */
738 RF_ASSERT(logData != NULL);
739 regionID = logData->regionID;
740 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
741 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
742
743 if (clearReintFlag)
744 {
745 /* Enable flushing for this region. Holding both locks provides
746 a synchronization barrier with DumpParityLogToDisk
747 */
748 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
749 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
750 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
751 raidPtr->regionInfo[regionID].diskCount = 0;
752 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
753 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now enabled */
754 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
755 }
756
757 /* process each item in logData */
758 while (logData)
759 {
760 /* remove an item from logData */
761 item = logData;
762 logData = logData->next;
763 item->next = NULL;
764 item->prev = NULL;
765
766 if (rf_parityLogDebug)
767 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n",item->regionID,(int)item->diskAddress.raidAddress, (int)item->diskAddress.numSector);
768
769 /* see if we moved to a new region */
770 if (regionID != item->regionID)
771 {
772 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
773 regionID = item->regionID;
774 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
775 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
776 }
777
778 punt = RF_FALSE; /* Set to RF_TRUE if work is blocked. This can happen in one of two ways:
779 1) no core log (AcquireParityLog)
780 2) waiting on reintegration (DumpParityLogToDisk)
781 If punt is RF_TRUE, the dataItem was queued, so skip to next item.
782 */
783
784 /* process item, one sector at a time, until all sectors processed or we punt */
785 if (item->diskAddress.numSector > 0)
786 done = RF_FALSE;
787 else
788 RF_ASSERT(0);
789 while (!punt && !done)
790 {
791 /* verify that a core log exists for this region */
792 if (!raidPtr->regionInfo[regionID].coreLog)
793 {
794 /* Attempt to acquire a parity log.
795 If acquisition fails, queue remaining work in data item and move to nextItem.
796 */
797 if (incomingLog)
798 if (*incomingLog)
799 {
800 RF_ASSERT((*incomingLog)->next == NULL);
801 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
802 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
803 *incomingLog = NULL;
804 }
805 else
806 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
807 else
808 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
809 /* Note: AcquireParityLog either returns a log or enqueues currentItem */
810 }
811 if (!raidPtr->regionInfo[regionID].coreLog)
812 punt = RF_TRUE; /* failed to find a core log */
813 else
814 {
815 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
816 /* verify that the log has room for new entries */
817 /* if log is full, dump it to disk and grab a new log */
818 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog)
819 {
820 /* log is full, dump it to disk */
821 if (DumpParityLogToDisk(finish, item))
822 punt = RF_TRUE; /* dump unsuccessful, blocked on reintegration */
823 else
824 {
825 /* dump was successful */
826 if (incomingLog)
827 if (*incomingLog)
828 {
829 RF_ASSERT((*incomingLog)->next == NULL);
830 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
831 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
832 *incomingLog = NULL;
833 }
834 else
835 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
836 else
837 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
838 /* if a core log is not available, must queue work and return */
839 if (!raidPtr->regionInfo[regionID].coreLog)
840 punt = RF_TRUE; /* blocked on log availability */
841 }
842 }
843 }
844 /* if we didn't punt on this item, attempt to add a sector to the core log */
845 if (!punt)
846 {
847 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
848 /* at this point, we have a core log with enough room for a sector */
849 /* copy a sector into the log */
850 log = raidPtr->regionInfo[regionID].coreLog;
851 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
852 logItem = log->numRecords++;
853 log->records[logItem].parityAddr = item->diskAddress;
854 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
855 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
856 log->records[logItem].parityAddr.numSector = 1;
857 log->records[logItem].operation = item->common->operation;
858 bcopy((item->common->bufPtr + (item->bufOffset++ * (1<<item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1<<item->common->raidPtr->logBytesPerSector)), (1<<item->common->raidPtr->logBytesPerSector));
859 item->diskAddress.numSector--;
860 item->diskAddress.startSector++;
861 if (item->diskAddress.numSector == 0)
862 done = RF_TRUE;
863 }
864 }
865
866 if (!punt)
867 {
868 /* Processed this item completely, decrement count of items
869 to be processed.
870 */
871 RF_ASSERT(item->diskAddress.numSector == 0);
872 RF_LOCK_MUTEX(item->common->mutex);
873 item->common->cnt--;
874 if (item->common->cnt == 0)
875 itemDone = RF_TRUE;
876 else
877 itemDone = RF_FALSE;
878 RF_UNLOCK_MUTEX(item->common->mutex);
879 if (itemDone)
880 {
881 /* Finished processing all log data for this IO
882 Return structs to free list and invoke wakeup function.
883 */
884 timer = item->common->startTime; /* grab initial value of timer */
885 RF_ETIMER_STOP(timer);
886 RF_ETIMER_EVAL(timer);
887 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
888 if (rf_parityLogDebug)
889 printf("[waking process for region %d]\n", item->regionID);
890 wakeFunc = item->common->wakeFunc;
891 wakeArg = item->common->wakeArg;
892 FreeParityLogCommonData(item->common);
893 FreeParityLogData(item);
894 (wakeFunc)(wakeArg, 0);
895 }
896 else
897 FreeParityLogData(item);
898 }
899 }
900 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
901 if (rf_parityLogDebug)
902 printf("[exiting ParityLogAppend]\n");
903 return(0);
904 }
905
906
907 void rf_EnableParityLogging(RF_Raid_t *raidPtr)
908 {
909 int regionID;
910
911 for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
912 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
913 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
914 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
915 }
916 if (rf_parityLogDebug)
917 printf("[parity logging enabled]\n");
918 }
919
920 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
921