rf_paritylog.c revision 1.6 1 /* $NetBSD: rf_paritylog.c,v 1.6 2001/10/04 15:58:54 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /* Code for manipulating in-core parity logs
30 *
31 */
32
33 #include "rf_archs.h"
34
35 #if RF_INCLUDE_PARITYLOGGING > 0
36
37 /*
38 * Append-only log for recording parity "update" and "overwrite" records
39 */
40
41 #include <dev/raidframe/raidframevar.h>
42
43 #include "rf_threadstuff.h"
44 #include "rf_mcpair.h"
45 #include "rf_raid.h"
46 #include "rf_dag.h"
47 #include "rf_dagfuncs.h"
48 #include "rf_desc.h"
49 #include "rf_layout.h"
50 #include "rf_diskqueue.h"
51 #include "rf_etimer.h"
52 #include "rf_paritylog.h"
53 #include "rf_general.h"
54 #include "rf_map.h"
55 #include "rf_paritylogging.h"
56 #include "rf_paritylogDiskMgr.h"
57
58 static RF_CommonLogData_t *
59 AllocParityLogCommonData(RF_Raid_t * raidPtr)
60 {
61 RF_CommonLogData_t *common = NULL;
62 int rc;
63
64 /* Return a struct for holding common parity log information from the
65 * free list (rf_parityLogDiskQueue.freeCommonList). If the free list
66 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
67
68 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
69 if (raidPtr->parityLogDiskQueue.freeCommonList) {
70 common = raidPtr->parityLogDiskQueue.freeCommonList;
71 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
72 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
73 } else {
74 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
75 RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
76 rc = rf_mutex_init(&common->mutex);
77 if (rc) {
78 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
79 __LINE__, rc);
80 RF_Free(common, sizeof(RF_CommonLogData_t));
81 common = NULL;
82 }
83 }
84 common->next = NULL;
85 return (common);
86 }
87
88 static void
89 FreeParityLogCommonData(RF_CommonLogData_t * common)
90 {
91 RF_Raid_t *raidPtr;
92
93 /* Insert a single struct for holding parity log information (data)
94 * into the free list (rf_parityLogDiskQueue.freeCommonList).
95 * NON-BLOCKING */
96
97 raidPtr = common->raidPtr;
98 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
99 common->next = raidPtr->parityLogDiskQueue.freeCommonList;
100 raidPtr->parityLogDiskQueue.freeCommonList = common;
101 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
102 }
103
104 static RF_ParityLogData_t *
105 AllocParityLogData(RF_Raid_t * raidPtr)
106 {
107 RF_ParityLogData_t *data = NULL;
108
109 /* Return a struct for holding parity log information from the free
110 * list (rf_parityLogDiskQueue.freeList). If the free list is empty,
111 * call RF_Malloc to create a new structure. NON-BLOCKING */
112
113 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
114 if (raidPtr->parityLogDiskQueue.freeDataList) {
115 data = raidPtr->parityLogDiskQueue.freeDataList;
116 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
117 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
118 } else {
119 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
120 RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
121 }
122 data->next = NULL;
123 data->prev = NULL;
124 return (data);
125 }
126
127
128 static void
129 FreeParityLogData(RF_ParityLogData_t * data)
130 {
131 RF_ParityLogData_t *nextItem;
132 RF_Raid_t *raidPtr;
133
134 /* Insert a linked list of structs for holding parity log information
135 * (data) into the free list (parityLogDiskQueue.freeList).
136 * NON-BLOCKING */
137
138 raidPtr = data->common->raidPtr;
139 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
140 while (data) {
141 nextItem = data->next;
142 data->next = raidPtr->parityLogDiskQueue.freeDataList;
143 raidPtr->parityLogDiskQueue.freeDataList = data;
144 data = nextItem;
145 }
146 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
147 }
148
149
150 static void
151 EnqueueParityLogData(
152 RF_ParityLogData_t * data,
153 RF_ParityLogData_t ** head,
154 RF_ParityLogData_t ** tail)
155 {
156 RF_Raid_t *raidPtr;
157
158 /* Insert an in-core parity log (*data) into the head of a disk queue
159 * (*head, *tail). NON-BLOCKING */
160
161 raidPtr = data->common->raidPtr;
162 if (rf_parityLogDebug)
163 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
164 RF_ASSERT(data->prev == NULL);
165 RF_ASSERT(data->next == NULL);
166 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
167 if (*head) {
168 /* insert into head of queue */
169 RF_ASSERT((*head)->prev == NULL);
170 RF_ASSERT((*tail)->next == NULL);
171 data->next = *head;
172 (*head)->prev = data;
173 *head = data;
174 } else {
175 /* insert into empty list */
176 RF_ASSERT(*head == NULL);
177 RF_ASSERT(*tail == NULL);
178 *head = data;
179 *tail = data;
180 }
181 RF_ASSERT((*head)->prev == NULL);
182 RF_ASSERT((*tail)->next == NULL);
183 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
184 }
185
186 static RF_ParityLogData_t *
187 DequeueParityLogData(
188 RF_Raid_t * raidPtr,
189 RF_ParityLogData_t ** head,
190 RF_ParityLogData_t ** tail,
191 int ignoreLocks)
192 {
193 RF_ParityLogData_t *data;
194
195 /* Remove and return an in-core parity log from the tail of a disk
196 * queue (*head, *tail). NON-BLOCKING */
197
198 /* remove from tail, preserving FIFO order */
199 if (!ignoreLocks)
200 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
201 data = *tail;
202 if (data) {
203 if (*head == *tail) {
204 /* removing last item from queue */
205 *head = NULL;
206 *tail = NULL;
207 } else {
208 *tail = (*tail)->prev;
209 (*tail)->next = NULL;
210 RF_ASSERT((*head)->prev == NULL);
211 RF_ASSERT((*tail)->next == NULL);
212 }
213 data->next = NULL;
214 data->prev = NULL;
215 if (rf_parityLogDebug)
216 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
217 }
218 if (*head) {
219 RF_ASSERT((*head)->prev == NULL);
220 RF_ASSERT((*tail)->next == NULL);
221 }
222 if (!ignoreLocks)
223 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
224 return (data);
225 }
226
227
228 static void
229 RequeueParityLogData(
230 RF_ParityLogData_t * data,
231 RF_ParityLogData_t ** head,
232 RF_ParityLogData_t ** tail)
233 {
234 RF_Raid_t *raidPtr;
235
236 /* Insert an in-core parity log (*data) into the tail of a disk queue
237 * (*head, *tail). NON-BLOCKING */
238
239 raidPtr = data->common->raidPtr;
240 RF_ASSERT(data);
241 if (rf_parityLogDebug)
242 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
243 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
244 if (*tail) {
245 /* append to tail of list */
246 data->prev = *tail;
247 data->next = NULL;
248 (*tail)->next = data;
249 *tail = data;
250 } else {
251 /* inserting into an empty list */
252 *head = data;
253 *tail = data;
254 (*head)->prev = NULL;
255 (*tail)->next = NULL;
256 }
257 RF_ASSERT((*head)->prev == NULL);
258 RF_ASSERT((*tail)->next == NULL);
259 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
260 }
261
262 RF_ParityLogData_t *
263 rf_CreateParityLogData(
264 RF_ParityRecordType_t operation,
265 RF_PhysDiskAddr_t * pda,
266 caddr_t bufPtr,
267 RF_Raid_t * raidPtr,
268 int (*wakeFunc) (RF_DagNode_t * node, int status),
269 void *wakeArg,
270 RF_AccTraceEntry_t * tracerec,
271 RF_Etimer_t startTime)
272 {
273 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
274 RF_CommonLogData_t *common;
275 RF_PhysDiskAddr_t *diskAddress;
276 int boundary, offset = 0;
277
278 /* Return an initialized struct of info to be logged. Build one item
279 * per physical disk address, one item per region.
280 *
281 * NON-BLOCKING */
282
283 diskAddress = pda;
284 common = AllocParityLogCommonData(raidPtr);
285 RF_ASSERT(common);
286
287 common->operation = operation;
288 common->bufPtr = bufPtr;
289 common->raidPtr = raidPtr;
290 common->wakeFunc = wakeFunc;
291 common->wakeArg = wakeArg;
292 common->tracerec = tracerec;
293 common->startTime = startTime;
294 common->cnt = 0;
295
296 if (rf_parityLogDebug)
297 printf("[entering CreateParityLogData]\n");
298 while (diskAddress) {
299 common->cnt++;
300 data = AllocParityLogData(raidPtr);
301 RF_ASSERT(data);
302 data->common = common;
303 data->next = NULL;
304 data->prev = NULL;
305 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
306 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
307 /* disk address does not cross a region boundary */
308 data->diskAddress = *diskAddress;
309 data->bufOffset = offset;
310 offset = offset + diskAddress->numSector;
311 EnqueueParityLogData(data, &resultHead, &resultTail);
312 /* adjust disk address */
313 diskAddress = diskAddress->next;
314 } else {
315 /* disk address crosses a region boundary */
316 /* find address where region is crossed */
317 boundary = 0;
318 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
319 boundary++;
320
321 /* enter data before the boundary */
322 data->diskAddress = *diskAddress;
323 data->diskAddress.numSector = boundary;
324 data->bufOffset = offset;
325 offset += boundary;
326 EnqueueParityLogData(data, &resultHead, &resultTail);
327 /* adjust disk address */
328 diskAddress->startSector += boundary;
329 diskAddress->numSector -= boundary;
330 }
331 }
332 if (rf_parityLogDebug)
333 printf("[leaving CreateParityLogData]\n");
334 return (resultHead);
335 }
336
337
338 RF_ParityLogData_t *
339 rf_SearchAndDequeueParityLogData(
340 RF_Raid_t * raidPtr,
341 int regionID,
342 RF_ParityLogData_t ** head,
343 RF_ParityLogData_t ** tail,
344 int ignoreLocks)
345 {
346 RF_ParityLogData_t *w;
347
348 /* Remove and return an in-core parity log from a specified region
349 * (regionID). If a matching log is not found, return NULL.
350 *
351 * NON-BLOCKING. */
352
353 /* walk backward through a list, looking for an entry with a matching
354 * region ID */
355 if (!ignoreLocks)
356 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
357 w = (*tail);
358 while (w) {
359 if (w->regionID == regionID) {
360 /* remove an element from the list */
361 if (w == *tail) {
362 if (*head == *tail) {
363 /* removing only element in the list */
364 *head = NULL;
365 *tail = NULL;
366 } else {
367 /* removing last item in the list */
368 *tail = (*tail)->prev;
369 (*tail)->next = NULL;
370 RF_ASSERT((*head)->prev == NULL);
371 RF_ASSERT((*tail)->next == NULL);
372 }
373 } else {
374 if (w == *head) {
375 /* removing first item in the list */
376 *head = (*head)->next;
377 (*head)->prev = NULL;
378 RF_ASSERT((*head)->prev == NULL);
379 RF_ASSERT((*tail)->next == NULL);
380 } else {
381 /* removing an item from the middle of
382 * the list */
383 w->prev->next = w->next;
384 w->next->prev = w->prev;
385 RF_ASSERT((*head)->prev == NULL);
386 RF_ASSERT((*tail)->next == NULL);
387 }
388 }
389 w->prev = NULL;
390 w->next = NULL;
391 if (rf_parityLogDebug)
392 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
393 return (w);
394 } else
395 w = w->prev;
396 }
397 if (!ignoreLocks)
398 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
399 return (NULL);
400 }
401
402 static RF_ParityLogData_t *
403 DequeueMatchingLogData(
404 RF_Raid_t * raidPtr,
405 RF_ParityLogData_t ** head,
406 RF_ParityLogData_t ** tail)
407 {
408 RF_ParityLogData_t *logDataList, *logData;
409 int regionID;
410
411 /* Remove and return an in-core parity log from the tail of a disk
412 * queue (*head, *tail). Then remove all matching (identical
413 * regionIDs) logData and return as a linked list.
414 *
415 * NON-BLOCKING */
416
417 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
418 if (logDataList) {
419 regionID = logDataList->regionID;
420 logData = logDataList;
421 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
422 while (logData->next) {
423 logData = logData->next;
424 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
425 }
426 }
427 return (logDataList);
428 }
429
430
431 static RF_ParityLog_t *
432 AcquireParityLog(
433 RF_ParityLogData_t * logData,
434 int finish)
435 {
436 RF_ParityLog_t *log = NULL;
437 RF_Raid_t *raidPtr;
438
439 /* Grab a log buffer from the pool and return it. If no buffers are
440 * available, return NULL. NON-BLOCKING */
441 raidPtr = logData->common->raidPtr;
442 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
443 if (raidPtr->parityLogPool.parityLogs) {
444 log = raidPtr->parityLogPool.parityLogs;
445 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
446 log->regionID = logData->regionID;
447 log->numRecords = 0;
448 log->next = NULL;
449 raidPtr->logsInUse++;
450 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
451 } else {
452 /* no logs available, so place ourselves on the queue of work
453 * waiting on log buffers this is done while
454 * parityLogPool.mutex is held, to ensure synchronization with
455 * ReleaseParityLogs. */
456 if (rf_parityLogDebug)
457 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
458 if (finish)
459 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
460 else
461 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
462 }
463 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
464 return (log);
465 }
466
467 void
468 rf_ReleaseParityLogs(
469 RF_Raid_t * raidPtr,
470 RF_ParityLog_t * firstLog)
471 {
472 RF_ParityLogData_t *logDataList;
473 RF_ParityLog_t *log, *lastLog;
474 int cnt;
475
476 /* Insert a linked list of parity logs (firstLog) to the free list
477 * (parityLogPool.parityLogPool)
478 *
479 * NON-BLOCKING. */
480
481 RF_ASSERT(firstLog);
482
483 /* Before returning logs to global free list, service all requests
484 * which are blocked on logs. Holding mutexes for parityLogPool and
485 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
486 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
487 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
488 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
489 log = firstLog;
490 if (firstLog)
491 firstLog = firstLog->next;
492 log->numRecords = 0;
493 log->next = NULL;
494 while (logDataList && log) {
495 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
496 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
497 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
498 if (rf_parityLogDebug)
499 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
500 if (log == NULL) {
501 log = firstLog;
502 if (firstLog) {
503 firstLog = firstLog->next;
504 log->numRecords = 0;
505 log->next = NULL;
506 }
507 }
508 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
509 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
510 if (log)
511 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
512 }
513 /* return remaining logs to pool */
514 if (log) {
515 log->next = firstLog;
516 firstLog = log;
517 }
518 if (firstLog) {
519 lastLog = firstLog;
520 raidPtr->logsInUse--;
521 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
522 while (lastLog->next) {
523 lastLog = lastLog->next;
524 raidPtr->logsInUse--;
525 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
526 }
527 lastLog->next = raidPtr->parityLogPool.parityLogs;
528 raidPtr->parityLogPool.parityLogs = firstLog;
529 cnt = 0;
530 log = raidPtr->parityLogPool.parityLogs;
531 while (log) {
532 cnt++;
533 log = log->next;
534 }
535 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
536 }
537 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
538 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
539 }
540
541 static void
542 ReintLog(
543 RF_Raid_t * raidPtr,
544 int regionID,
545 RF_ParityLog_t * log)
546 {
547 RF_ASSERT(log);
548
549 /* Insert an in-core parity log (log) into the disk queue of
550 * reintegration work. Set the flag (reintInProgress) for the
551 * specified region (regionID) to indicate that reintegration is in
552 * progress for this region. NON-BLOCKING */
553
554 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
555 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint
556 * complete */
557
558 if (rf_parityLogDebug)
559 printf("[requesting reintegration of region %d]\n", log->regionID);
560 /* move record to reintegration queue */
561 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
562 log->next = raidPtr->parityLogDiskQueue.reintQueue;
563 raidPtr->parityLogDiskQueue.reintQueue = log;
564 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
565 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
566 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
567 }
568
569 static void
570 FlushLog(
571 RF_Raid_t * raidPtr,
572 RF_ParityLog_t * log)
573 {
574 /* insert a core log (log) into a list of logs
575 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
576 * NON-BLOCKING */
577
578 RF_ASSERT(log);
579 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
580 RF_ASSERT(log->next == NULL);
581 /* move log to flush queue */
582 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
583 log->next = raidPtr->parityLogDiskQueue.flushQueue;
584 raidPtr->parityLogDiskQueue.flushQueue = log;
585 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
586 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
587 }
588
589 static int
590 DumpParityLogToDisk(
591 int finish,
592 RF_ParityLogData_t * logData)
593 {
594 int i, diskCount, regionID = logData->regionID;
595 RF_ParityLog_t *log;
596 RF_Raid_t *raidPtr;
597
598 raidPtr = logData->common->raidPtr;
599
600 /* Move a core log to disk. If the log disk is full, initiate
601 * reintegration.
602 *
603 * Return (0) if we can enqueue the dump immediately, otherwise return
604 * (1) to indicate we are blocked on reintegration and control of the
605 * thread should be relinquished.
606 *
607 * Caller must hold regionInfo[regionID].mutex
608 *
609 * NON-BLOCKING */
610
611 if (rf_parityLogDebug)
612 printf("[dumping parity log to disk, region %d]\n", regionID);
613 log = raidPtr->regionInfo[regionID].coreLog;
614 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
615 RF_ASSERT(log->next == NULL);
616
617 /* if reintegration is in progress, must queue work */
618 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
619 if (raidPtr->regionInfo[regionID].reintInProgress) {
620 /* Can not proceed since this region is currently being
621 * reintegrated. We can not block, so queue remaining work and
622 * return */
623 if (rf_parityLogDebug)
624 printf("[region %d waiting on reintegration]\n", regionID);
625 /* XXX not sure about the use of finish - shouldn't this
626 * always be "Enqueue"? */
627 if (finish)
628 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
629 else
630 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
631 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
632 return (1); /* relenquish control of this thread */
633 }
634 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
635 raidPtr->regionInfo[regionID].coreLog = NULL;
636 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
637 /* IMPORTANT!! this loop bound assumes region disk holds an
638 * integral number of core logs */
639 {
640 /* update disk map for this region */
641 diskCount = raidPtr->regionInfo[regionID].diskCount;
642 for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
643 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
644 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
645 }
646 log->diskOffset = diskCount;
647 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
648 FlushLog(raidPtr, log);
649 } else {
650 /* no room for log on disk, send it to disk manager and
651 * request reintegration */
652 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
653 ReintLog(raidPtr, regionID, log);
654 }
655 if (rf_parityLogDebug)
656 printf("[finished dumping parity log to disk, region %d]\n", regionID);
657 return (0);
658 }
659
660 int
661 rf_ParityLogAppend(
662 RF_ParityLogData_t * logData,
663 int finish,
664 RF_ParityLog_t ** incomingLog,
665 int clearReintFlag)
666 {
667 int regionID, logItem, itemDone;
668 RF_ParityLogData_t *item;
669 int punt, done = RF_FALSE;
670 RF_ParityLog_t *log;
671 RF_Raid_t *raidPtr;
672 RF_Etimer_t timer;
673 int (*wakeFunc) (RF_DagNode_t * node, int status);
674 void *wakeArg;
675
676 /* Add parity to the appropriate log, one sector at a time. This
677 * routine is called is called by dag functions ParityLogUpdateFunc
678 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
679 *
680 * Parity to be logged is contained in a linked-list (logData). When
681 * this routine returns, every sector in the list will be in one of
682 * three places: 1) entered into the parity log 2) queued, waiting on
683 * reintegration 3) queued, waiting on a core log
684 *
685 * Blocked work is passed to the ParityLoggingDiskManager for completion.
686 * Later, as conditions which required the block are removed, the work
687 * reenters this routine with the "finish" parameter set to "RF_TRUE."
688 *
689 * NON-BLOCKING */
690
691 raidPtr = logData->common->raidPtr;
692 /* lock the region for the first item in logData */
693 RF_ASSERT(logData != NULL);
694 regionID = logData->regionID;
695 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
696 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
697
698 if (clearReintFlag) {
699 /* Enable flushing for this region. Holding both locks
700 * provides a synchronization barrier with DumpParityLogToDisk */
701 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
702 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
703 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
704 raidPtr->regionInfo[regionID].diskCount = 0;
705 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
706 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now
707 * enabled */
708 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
709 }
710 /* process each item in logData */
711 while (logData) {
712 /* remove an item from logData */
713 item = logData;
714 logData = logData->next;
715 item->next = NULL;
716 item->prev = NULL;
717
718 if (rf_parityLogDebug)
719 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
720
721 /* see if we moved to a new region */
722 if (regionID != item->regionID) {
723 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
724 regionID = item->regionID;
725 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
726 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
727 }
728 punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This
729 * can happen in one of two ways: 1) no core
730 * log (AcquireParityLog) 2) waiting on
731 * reintegration (DumpParityLogToDisk) If punt
732 * is RF_TRUE, the dataItem was queued, so
733 * skip to next item. */
734
735 /* process item, one sector at a time, until all sectors
736 * processed or we punt */
737 if (item->diskAddress.numSector > 0)
738 done = RF_FALSE;
739 else
740 RF_ASSERT(0);
741 while (!punt && !done) {
742 /* verify that a core log exists for this region */
743 if (!raidPtr->regionInfo[regionID].coreLog) {
744 /* Attempt to acquire a parity log. If
745 * acquisition fails, queue remaining work in
746 * data item and move to nextItem. */
747 if (incomingLog)
748 if (*incomingLog) {
749 RF_ASSERT((*incomingLog)->next == NULL);
750 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
751 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
752 *incomingLog = NULL;
753 } else
754 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
755 else
756 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
757 /* Note: AcquireParityLog either returns a log
758 * or enqueues currentItem */
759 }
760 if (!raidPtr->regionInfo[regionID].coreLog)
761 punt = RF_TRUE; /* failed to find a core log */
762 else {
763 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
764 /* verify that the log has room for new
765 * entries */
766 /* if log is full, dump it to disk and grab a
767 * new log */
768 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
769 /* log is full, dump it to disk */
770 if (DumpParityLogToDisk(finish, item))
771 punt = RF_TRUE; /* dump unsuccessful,
772 * blocked on
773 * reintegration */
774 else {
775 /* dump was successful */
776 if (incomingLog)
777 if (*incomingLog) {
778 RF_ASSERT((*incomingLog)->next == NULL);
779 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
780 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
781 *incomingLog = NULL;
782 } else
783 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
784 else
785 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
786 /* if a core log is not
787 * available, must queue work
788 * and return */
789 if (!raidPtr->regionInfo[regionID].coreLog)
790 punt = RF_TRUE; /* blocked on log
791 * availability */
792 }
793 }
794 }
795 /* if we didn't punt on this item, attempt to add a
796 * sector to the core log */
797 if (!punt) {
798 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
799 /* at this point, we have a core log with
800 * enough room for a sector */
801 /* copy a sector into the log */
802 log = raidPtr->regionInfo[regionID].coreLog;
803 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
804 logItem = log->numRecords++;
805 log->records[logItem].parityAddr = item->diskAddress;
806 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
807 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
808 log->records[logItem].parityAddr.numSector = 1;
809 log->records[logItem].operation = item->common->operation;
810 bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector));
811 item->diskAddress.numSector--;
812 item->diskAddress.startSector++;
813 if (item->diskAddress.numSector == 0)
814 done = RF_TRUE;
815 }
816 }
817
818 if (!punt) {
819 /* Processed this item completely, decrement count of
820 * items to be processed. */
821 RF_ASSERT(item->diskAddress.numSector == 0);
822 RF_LOCK_MUTEX(item->common->mutex);
823 item->common->cnt--;
824 if (item->common->cnt == 0)
825 itemDone = RF_TRUE;
826 else
827 itemDone = RF_FALSE;
828 RF_UNLOCK_MUTEX(item->common->mutex);
829 if (itemDone) {
830 /* Finished processing all log data for this
831 * IO Return structs to free list and invoke
832 * wakeup function. */
833 timer = item->common->startTime; /* grab initial value of
834 * timer */
835 RF_ETIMER_STOP(timer);
836 RF_ETIMER_EVAL(timer);
837 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
838 if (rf_parityLogDebug)
839 printf("[waking process for region %d]\n", item->regionID);
840 wakeFunc = item->common->wakeFunc;
841 wakeArg = item->common->wakeArg;
842 FreeParityLogCommonData(item->common);
843 FreeParityLogData(item);
844 (wakeFunc) (wakeArg, 0);
845 } else
846 FreeParityLogData(item);
847 }
848 }
849 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
850 if (rf_parityLogDebug)
851 printf("[exiting ParityLogAppend]\n");
852 return (0);
853 }
854
855
856 void
857 rf_EnableParityLogging(RF_Raid_t * raidPtr)
858 {
859 int regionID;
860
861 for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
862 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
863 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
864 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
865 }
866 if (rf_parityLogDebug)
867 printf("[parity logging enabled]\n");
868 }
869 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
870