rf_paritylog.c revision 1.4.2.1 1 /* $NetBSD: rf_paritylog.c,v 1.4.2.1 2000/11/20 11:42:56 bouyer Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /* Code for manipulating in-core parity logs
30 *
31 */
32
33 #include "rf_archs.h"
34
35 #if RF_INCLUDE_PARITYLOGGING > 0
36
37 /*
38 * Append-only log for recording parity "update" and "overwrite" records
39 */
40
41 #include "rf_types.h"
42 #include "rf_threadstuff.h"
43 #include "rf_mcpair.h"
44 #include "rf_raid.h"
45 #include "rf_dag.h"
46 #include "rf_dagfuncs.h"
47 #include "rf_desc.h"
48 #include "rf_layout.h"
49 #include "rf_diskqueue.h"
50 #include "rf_etimer.h"
51 #include "rf_paritylog.h"
52 #include "rf_general.h"
53 #include "rf_map.h"
54 #include "rf_paritylogging.h"
55 #include "rf_paritylogDiskMgr.h"
56
57 static RF_CommonLogData_t *
58 AllocParityLogCommonData(RF_Raid_t * raidPtr)
59 {
60 RF_CommonLogData_t *common = NULL;
61 int rc;
62
63 /* Return a struct for holding common parity log information from the
64 * free list (rf_parityLogDiskQueue.freeCommonList). If the free list
65 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
66
67 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
68 if (raidPtr->parityLogDiskQueue.freeCommonList) {
69 common = raidPtr->parityLogDiskQueue.freeCommonList;
70 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
71 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
72 } else {
73 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
74 RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
75 rc = rf_mutex_init(&common->mutex);
76 if (rc) {
77 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
78 __LINE__, rc);
79 RF_Free(common, sizeof(RF_CommonLogData_t));
80 common = NULL;
81 }
82 }
83 common->next = NULL;
84 return (common);
85 }
86
87 static void
88 FreeParityLogCommonData(RF_CommonLogData_t * common)
89 {
90 RF_Raid_t *raidPtr;
91
92 /* Insert a single struct for holding parity log information (data)
93 * into the free list (rf_parityLogDiskQueue.freeCommonList).
94 * NON-BLOCKING */
95
96 raidPtr = common->raidPtr;
97 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
98 common->next = raidPtr->parityLogDiskQueue.freeCommonList;
99 raidPtr->parityLogDiskQueue.freeCommonList = common;
100 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
101 }
102
103 static RF_ParityLogData_t *
104 AllocParityLogData(RF_Raid_t * raidPtr)
105 {
106 RF_ParityLogData_t *data = NULL;
107
108 /* Return a struct for holding parity log information from the free
109 * list (rf_parityLogDiskQueue.freeList). If the free list is empty,
110 * call RF_Malloc to create a new structure. NON-BLOCKING */
111
112 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
113 if (raidPtr->parityLogDiskQueue.freeDataList) {
114 data = raidPtr->parityLogDiskQueue.freeDataList;
115 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
116 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
117 } else {
118 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
119 RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
120 }
121 data->next = NULL;
122 data->prev = NULL;
123 return (data);
124 }
125
126
127 static void
128 FreeParityLogData(RF_ParityLogData_t * data)
129 {
130 RF_ParityLogData_t *nextItem;
131 RF_Raid_t *raidPtr;
132
133 /* Insert a linked list of structs for holding parity log information
134 * (data) into the free list (parityLogDiskQueue.freeList).
135 * NON-BLOCKING */
136
137 raidPtr = data->common->raidPtr;
138 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
139 while (data) {
140 nextItem = data->next;
141 data->next = raidPtr->parityLogDiskQueue.freeDataList;
142 raidPtr->parityLogDiskQueue.freeDataList = data;
143 data = nextItem;
144 }
145 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
146 }
147
148
149 static void
150 EnqueueParityLogData(
151 RF_ParityLogData_t * data,
152 RF_ParityLogData_t ** head,
153 RF_ParityLogData_t ** tail)
154 {
155 RF_Raid_t *raidPtr;
156
157 /* Insert an in-core parity log (*data) into the head of a disk queue
158 * (*head, *tail). NON-BLOCKING */
159
160 raidPtr = data->common->raidPtr;
161 if (rf_parityLogDebug)
162 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
163 RF_ASSERT(data->prev == NULL);
164 RF_ASSERT(data->next == NULL);
165 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
166 if (*head) {
167 /* insert into head of queue */
168 RF_ASSERT((*head)->prev == NULL);
169 RF_ASSERT((*tail)->next == NULL);
170 data->next = *head;
171 (*head)->prev = data;
172 *head = data;
173 } else {
174 /* insert into empty list */
175 RF_ASSERT(*head == NULL);
176 RF_ASSERT(*tail == NULL);
177 *head = data;
178 *tail = data;
179 }
180 RF_ASSERT((*head)->prev == NULL);
181 RF_ASSERT((*tail)->next == NULL);
182 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
183 }
184
185 static RF_ParityLogData_t *
186 DequeueParityLogData(
187 RF_Raid_t * raidPtr,
188 RF_ParityLogData_t ** head,
189 RF_ParityLogData_t ** tail,
190 int ignoreLocks)
191 {
192 RF_ParityLogData_t *data;
193
194 /* Remove and return an in-core parity log from the tail of a disk
195 * queue (*head, *tail). NON-BLOCKING */
196
197 /* remove from tail, preserving FIFO order */
198 if (!ignoreLocks)
199 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
200 data = *tail;
201 if (data) {
202 if (*head == *tail) {
203 /* removing last item from queue */
204 *head = NULL;
205 *tail = NULL;
206 } else {
207 *tail = (*tail)->prev;
208 (*tail)->next = NULL;
209 RF_ASSERT((*head)->prev == NULL);
210 RF_ASSERT((*tail)->next == NULL);
211 }
212 data->next = NULL;
213 data->prev = NULL;
214 if (rf_parityLogDebug)
215 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
216 }
217 if (*head) {
218 RF_ASSERT((*head)->prev == NULL);
219 RF_ASSERT((*tail)->next == NULL);
220 }
221 if (!ignoreLocks)
222 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
223 return (data);
224 }
225
226
227 static void
228 RequeueParityLogData(
229 RF_ParityLogData_t * data,
230 RF_ParityLogData_t ** head,
231 RF_ParityLogData_t ** tail)
232 {
233 RF_Raid_t *raidPtr;
234
235 /* Insert an in-core parity log (*data) into the tail of a disk queue
236 * (*head, *tail). NON-BLOCKING */
237
238 raidPtr = data->common->raidPtr;
239 RF_ASSERT(data);
240 if (rf_parityLogDebug)
241 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
242 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
243 if (*tail) {
244 /* append to tail of list */
245 data->prev = *tail;
246 data->next = NULL;
247 (*tail)->next = data;
248 *tail = data;
249 } else {
250 /* inserting into an empty list */
251 *head = data;
252 *tail = data;
253 (*head)->prev = NULL;
254 (*tail)->next = NULL;
255 }
256 RF_ASSERT((*head)->prev == NULL);
257 RF_ASSERT((*tail)->next == NULL);
258 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
259 }
260
261 RF_ParityLogData_t *
262 rf_CreateParityLogData(
263 RF_ParityRecordType_t operation,
264 RF_PhysDiskAddr_t * pda,
265 caddr_t bufPtr,
266 RF_Raid_t * raidPtr,
267 int (*wakeFunc) (RF_DagNode_t * node, int status),
268 void *wakeArg,
269 RF_AccTraceEntry_t * tracerec,
270 RF_Etimer_t startTime)
271 {
272 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
273 RF_CommonLogData_t *common;
274 RF_PhysDiskAddr_t *diskAddress;
275 int boundary, offset = 0;
276
277 /* Return an initialized struct of info to be logged. Build one item
278 * per physical disk address, one item per region.
279 *
280 * NON-BLOCKING */
281
282 diskAddress = pda;
283 common = AllocParityLogCommonData(raidPtr);
284 RF_ASSERT(common);
285
286 common->operation = operation;
287 common->bufPtr = bufPtr;
288 common->raidPtr = raidPtr;
289 common->wakeFunc = wakeFunc;
290 common->wakeArg = wakeArg;
291 common->tracerec = tracerec;
292 common->startTime = startTime;
293 common->cnt = 0;
294
295 if (rf_parityLogDebug)
296 printf("[entering CreateParityLogData]\n");
297 while (diskAddress) {
298 common->cnt++;
299 data = AllocParityLogData(raidPtr);
300 RF_ASSERT(data);
301 data->common = common;
302 data->next = NULL;
303 data->prev = NULL;
304 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
305 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
306 /* disk address does not cross a region boundary */
307 data->diskAddress = *diskAddress;
308 data->bufOffset = offset;
309 offset = offset + diskAddress->numSector;
310 EnqueueParityLogData(data, &resultHead, &resultTail);
311 /* adjust disk address */
312 diskAddress = diskAddress->next;
313 } else {
314 /* disk address crosses a region boundary */
315 /* find address where region is crossed */
316 boundary = 0;
317 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
318 boundary++;
319
320 /* enter data before the boundary */
321 data->diskAddress = *diskAddress;
322 data->diskAddress.numSector = boundary;
323 data->bufOffset = offset;
324 offset += boundary;
325 EnqueueParityLogData(data, &resultHead, &resultTail);
326 /* adjust disk address */
327 diskAddress->startSector += boundary;
328 diskAddress->numSector -= boundary;
329 }
330 }
331 if (rf_parityLogDebug)
332 printf("[leaving CreateParityLogData]\n");
333 return (resultHead);
334 }
335
336
337 RF_ParityLogData_t *
338 rf_SearchAndDequeueParityLogData(
339 RF_Raid_t * raidPtr,
340 int regionID,
341 RF_ParityLogData_t ** head,
342 RF_ParityLogData_t ** tail,
343 int ignoreLocks)
344 {
345 RF_ParityLogData_t *w;
346
347 /* Remove and return an in-core parity log from a specified region
348 * (regionID). If a matching log is not found, return NULL.
349 *
350 * NON-BLOCKING. */
351
352 /* walk backward through a list, looking for an entry with a matching
353 * region ID */
354 if (!ignoreLocks)
355 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
356 w = (*tail);
357 while (w) {
358 if (w->regionID == regionID) {
359 /* remove an element from the list */
360 if (w == *tail) {
361 if (*head == *tail) {
362 /* removing only element in the list */
363 *head = NULL;
364 *tail = NULL;
365 } else {
366 /* removing last item in the list */
367 *tail = (*tail)->prev;
368 (*tail)->next = NULL;
369 RF_ASSERT((*head)->prev == NULL);
370 RF_ASSERT((*tail)->next == NULL);
371 }
372 } else {
373 if (w == *head) {
374 /* removing first item in the list */
375 *head = (*head)->next;
376 (*head)->prev = NULL;
377 RF_ASSERT((*head)->prev == NULL);
378 RF_ASSERT((*tail)->next == NULL);
379 } else {
380 /* removing an item from the middle of
381 * the list */
382 w->prev->next = w->next;
383 w->next->prev = w->prev;
384 RF_ASSERT((*head)->prev == NULL);
385 RF_ASSERT((*tail)->next == NULL);
386 }
387 }
388 w->prev = NULL;
389 w->next = NULL;
390 if (rf_parityLogDebug)
391 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
392 return (w);
393 } else
394 w = w->prev;
395 }
396 if (!ignoreLocks)
397 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
398 return (NULL);
399 }
400
401 static RF_ParityLogData_t *
402 DequeueMatchingLogData(
403 RF_Raid_t * raidPtr,
404 RF_ParityLogData_t ** head,
405 RF_ParityLogData_t ** tail)
406 {
407 RF_ParityLogData_t *logDataList, *logData;
408 int regionID;
409
410 /* Remove and return an in-core parity log from the tail of a disk
411 * queue (*head, *tail). Then remove all matching (identical
412 * regionIDs) logData and return as a linked list.
413 *
414 * NON-BLOCKING */
415
416 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
417 if (logDataList) {
418 regionID = logDataList->regionID;
419 logData = logDataList;
420 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
421 while (logData->next) {
422 logData = logData->next;
423 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
424 }
425 }
426 return (logDataList);
427 }
428
429
430 static RF_ParityLog_t *
431 AcquireParityLog(
432 RF_ParityLogData_t * logData,
433 int finish)
434 {
435 RF_ParityLog_t *log = NULL;
436 RF_Raid_t *raidPtr;
437
438 /* Grab a log buffer from the pool and return it. If no buffers are
439 * available, return NULL. NON-BLOCKING */
440 raidPtr = logData->common->raidPtr;
441 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
442 if (raidPtr->parityLogPool.parityLogs) {
443 log = raidPtr->parityLogPool.parityLogs;
444 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
445 log->regionID = logData->regionID;
446 log->numRecords = 0;
447 log->next = NULL;
448 raidPtr->logsInUse++;
449 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
450 } else {
451 /* no logs available, so place ourselves on the queue of work
452 * waiting on log buffers this is done while
453 * parityLogPool.mutex is held, to ensure synchronization with
454 * ReleaseParityLogs. */
455 if (rf_parityLogDebug)
456 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
457 if (finish)
458 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
459 else
460 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
461 }
462 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
463 return (log);
464 }
465
466 void
467 rf_ReleaseParityLogs(
468 RF_Raid_t * raidPtr,
469 RF_ParityLog_t * firstLog)
470 {
471 RF_ParityLogData_t *logDataList;
472 RF_ParityLog_t *log, *lastLog;
473 int cnt;
474
475 /* Insert a linked list of parity logs (firstLog) to the free list
476 * (parityLogPool.parityLogPool)
477 *
478 * NON-BLOCKING. */
479
480 RF_ASSERT(firstLog);
481
482 /* Before returning logs to global free list, service all requests
483 * which are blocked on logs. Holding mutexes for parityLogPool and
484 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
485 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
486 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
487 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
488 log = firstLog;
489 if (firstLog)
490 firstLog = firstLog->next;
491 log->numRecords = 0;
492 log->next = NULL;
493 while (logDataList && log) {
494 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
495 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
496 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
497 if (rf_parityLogDebug)
498 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
499 if (log == NULL) {
500 log = firstLog;
501 if (firstLog) {
502 firstLog = firstLog->next;
503 log->numRecords = 0;
504 log->next = NULL;
505 }
506 }
507 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
508 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
509 if (log)
510 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
511 }
512 /* return remaining logs to pool */
513 if (log) {
514 log->next = firstLog;
515 firstLog = log;
516 }
517 if (firstLog) {
518 lastLog = firstLog;
519 raidPtr->logsInUse--;
520 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
521 while (lastLog->next) {
522 lastLog = lastLog->next;
523 raidPtr->logsInUse--;
524 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
525 }
526 lastLog->next = raidPtr->parityLogPool.parityLogs;
527 raidPtr->parityLogPool.parityLogs = firstLog;
528 cnt = 0;
529 log = raidPtr->parityLogPool.parityLogs;
530 while (log) {
531 cnt++;
532 log = log->next;
533 }
534 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
535 }
536 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
537 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
538 }
539
540 static void
541 ReintLog(
542 RF_Raid_t * raidPtr,
543 int regionID,
544 RF_ParityLog_t * log)
545 {
546 RF_ASSERT(log);
547
548 /* Insert an in-core parity log (log) into the disk queue of
549 * reintegration work. Set the flag (reintInProgress) for the
550 * specified region (regionID) to indicate that reintegration is in
551 * progress for this region. NON-BLOCKING */
552
553 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
554 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint
555 * complete */
556
557 if (rf_parityLogDebug)
558 printf("[requesting reintegration of region %d]\n", log->regionID);
559 /* move record to reintegration queue */
560 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
561 log->next = raidPtr->parityLogDiskQueue.reintQueue;
562 raidPtr->parityLogDiskQueue.reintQueue = log;
563 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
564 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
565 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
566 }
567
568 static void
569 FlushLog(
570 RF_Raid_t * raidPtr,
571 RF_ParityLog_t * log)
572 {
573 /* insert a core log (log) into a list of logs
574 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
575 * NON-BLOCKING */
576
577 RF_ASSERT(log);
578 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
579 RF_ASSERT(log->next == NULL);
580 /* move log to flush queue */
581 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
582 log->next = raidPtr->parityLogDiskQueue.flushQueue;
583 raidPtr->parityLogDiskQueue.flushQueue = log;
584 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
585 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
586 }
587
588 static int
589 DumpParityLogToDisk(
590 int finish,
591 RF_ParityLogData_t * logData)
592 {
593 int i, diskCount, regionID = logData->regionID;
594 RF_ParityLog_t *log;
595 RF_Raid_t *raidPtr;
596
597 raidPtr = logData->common->raidPtr;
598
599 /* Move a core log to disk. If the log disk is full, initiate
600 * reintegration.
601 *
602 * Return (0) if we can enqueue the dump immediately, otherwise return
603 * (1) to indicate we are blocked on reintegration and control of the
604 * thread should be relinquished.
605 *
606 * Caller must hold regionInfo[regionID].mutex
607 *
608 * NON-BLOCKING */
609
610 if (rf_parityLogDebug)
611 printf("[dumping parity log to disk, region %d]\n", regionID);
612 log = raidPtr->regionInfo[regionID].coreLog;
613 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
614 RF_ASSERT(log->next == NULL);
615
616 /* if reintegration is in progress, must queue work */
617 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
618 if (raidPtr->regionInfo[regionID].reintInProgress) {
619 /* Can not proceed since this region is currently being
620 * reintegrated. We can not block, so queue remaining work and
621 * return */
622 if (rf_parityLogDebug)
623 printf("[region %d waiting on reintegration]\n", regionID);
624 /* XXX not sure about the use of finish - shouldn't this
625 * always be "Enqueue"? */
626 if (finish)
627 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
628 else
629 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
630 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
631 return (1); /* relenquish control of this thread */
632 }
633 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
634 raidPtr->regionInfo[regionID].coreLog = NULL;
635 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
636 /* IMPORTANT!! this loop bound assumes region disk holds an
637 * integral number of core logs */
638 {
639 /* update disk map for this region */
640 diskCount = raidPtr->regionInfo[regionID].diskCount;
641 for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
642 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
643 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
644 }
645 log->diskOffset = diskCount;
646 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
647 FlushLog(raidPtr, log);
648 } else {
649 /* no room for log on disk, send it to disk manager and
650 * request reintegration */
651 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
652 ReintLog(raidPtr, regionID, log);
653 }
654 if (rf_parityLogDebug)
655 printf("[finished dumping parity log to disk, region %d]\n", regionID);
656 return (0);
657 }
658
659 int
660 rf_ParityLogAppend(
661 RF_ParityLogData_t * logData,
662 int finish,
663 RF_ParityLog_t ** incomingLog,
664 int clearReintFlag)
665 {
666 int regionID, logItem, itemDone;
667 RF_ParityLogData_t *item;
668 int punt, done = RF_FALSE;
669 RF_ParityLog_t *log;
670 RF_Raid_t *raidPtr;
671 RF_Etimer_t timer;
672 int (*wakeFunc) (RF_DagNode_t * node, int status);
673 void *wakeArg;
674
675 /* Add parity to the appropriate log, one sector at a time. This
676 * routine is called is called by dag functions ParityLogUpdateFunc
677 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
678 *
679 * Parity to be logged is contained in a linked-list (logData). When
680 * this routine returns, every sector in the list will be in one of
681 * three places: 1) entered into the parity log 2) queued, waiting on
682 * reintegration 3) queued, waiting on a core log
683 *
684 * Blocked work is passed to the ParityLoggingDiskManager for completion.
685 * Later, as conditions which required the block are removed, the work
686 * reenters this routine with the "finish" parameter set to "RF_TRUE."
687 *
688 * NON-BLOCKING */
689
690 raidPtr = logData->common->raidPtr;
691 /* lock the region for the first item in logData */
692 RF_ASSERT(logData != NULL);
693 regionID = logData->regionID;
694 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
695 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
696
697 if (clearReintFlag) {
698 /* Enable flushing for this region. Holding both locks
699 * provides a synchronization barrier with DumpParityLogToDisk */
700 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
701 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
702 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
703 raidPtr->regionInfo[regionID].diskCount = 0;
704 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
705 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now
706 * enabled */
707 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
708 }
709 /* process each item in logData */
710 while (logData) {
711 /* remove an item from logData */
712 item = logData;
713 logData = logData->next;
714 item->next = NULL;
715 item->prev = NULL;
716
717 if (rf_parityLogDebug)
718 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
719
720 /* see if we moved to a new region */
721 if (regionID != item->regionID) {
722 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
723 regionID = item->regionID;
724 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
725 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
726 }
727 punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This
728 * can happen in one of two ways: 1) no core
729 * log (AcquireParityLog) 2) waiting on
730 * reintegration (DumpParityLogToDisk) If punt
731 * is RF_TRUE, the dataItem was queued, so
732 * skip to next item. */
733
734 /* process item, one sector at a time, until all sectors
735 * processed or we punt */
736 if (item->diskAddress.numSector > 0)
737 done = RF_FALSE;
738 else
739 RF_ASSERT(0);
740 while (!punt && !done) {
741 /* verify that a core log exists for this region */
742 if (!raidPtr->regionInfo[regionID].coreLog) {
743 /* Attempt to acquire a parity log. If
744 * acquisition fails, queue remaining work in
745 * data item and move to nextItem. */
746 if (incomingLog)
747 if (*incomingLog) {
748 RF_ASSERT((*incomingLog)->next == NULL);
749 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
750 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
751 *incomingLog = NULL;
752 } else
753 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
754 else
755 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
756 /* Note: AcquireParityLog either returns a log
757 * or enqueues currentItem */
758 }
759 if (!raidPtr->regionInfo[regionID].coreLog)
760 punt = RF_TRUE; /* failed to find a core log */
761 else {
762 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
763 /* verify that the log has room for new
764 * entries */
765 /* if log is full, dump it to disk and grab a
766 * new log */
767 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
768 /* log is full, dump it to disk */
769 if (DumpParityLogToDisk(finish, item))
770 punt = RF_TRUE; /* dump unsuccessful,
771 * blocked on
772 * reintegration */
773 else {
774 /* dump was successful */
775 if (incomingLog)
776 if (*incomingLog) {
777 RF_ASSERT((*incomingLog)->next == NULL);
778 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
779 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
780 *incomingLog = NULL;
781 } else
782 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
783 else
784 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
785 /* if a core log is not
786 * available, must queue work
787 * and return */
788 if (!raidPtr->regionInfo[regionID].coreLog)
789 punt = RF_TRUE; /* blocked on log
790 * availability */
791 }
792 }
793 }
794 /* if we didn't punt on this item, attempt to add a
795 * sector to the core log */
796 if (!punt) {
797 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
798 /* at this point, we have a core log with
799 * enough room for a sector */
800 /* copy a sector into the log */
801 log = raidPtr->regionInfo[regionID].coreLog;
802 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
803 logItem = log->numRecords++;
804 log->records[logItem].parityAddr = item->diskAddress;
805 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
806 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
807 log->records[logItem].parityAddr.numSector = 1;
808 log->records[logItem].operation = item->common->operation;
809 bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector));
810 item->diskAddress.numSector--;
811 item->diskAddress.startSector++;
812 if (item->diskAddress.numSector == 0)
813 done = RF_TRUE;
814 }
815 }
816
817 if (!punt) {
818 /* Processed this item completely, decrement count of
819 * items to be processed. */
820 RF_ASSERT(item->diskAddress.numSector == 0);
821 RF_LOCK_MUTEX(item->common->mutex);
822 item->common->cnt--;
823 if (item->common->cnt == 0)
824 itemDone = RF_TRUE;
825 else
826 itemDone = RF_FALSE;
827 RF_UNLOCK_MUTEX(item->common->mutex);
828 if (itemDone) {
829 /* Finished processing all log data for this
830 * IO Return structs to free list and invoke
831 * wakeup function. */
832 timer = item->common->startTime; /* grab initial value of
833 * timer */
834 RF_ETIMER_STOP(timer);
835 RF_ETIMER_EVAL(timer);
836 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
837 if (rf_parityLogDebug)
838 printf("[waking process for region %d]\n", item->regionID);
839 wakeFunc = item->common->wakeFunc;
840 wakeArg = item->common->wakeArg;
841 FreeParityLogCommonData(item->common);
842 FreeParityLogData(item);
843 (wakeFunc) (wakeArg, 0);
844 } else
845 FreeParityLogData(item);
846 }
847 }
848 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
849 if (rf_parityLogDebug)
850 printf("[exiting ParityLogAppend]\n");
851 return (0);
852 }
853
854
855 void
856 rf_EnableParityLogging(RF_Raid_t * raidPtr)
857 {
858 int regionID;
859
860 for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
861 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
862 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
863 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
864 }
865 if (rf_parityLogDebug)
866 printf("[parity logging enabled]\n");
867 }
868 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
869