rf_paritylog.c revision 1.7 1 /* $NetBSD: rf_paritylog.c,v 1.7 2001/11/13 07:11:15 lukem Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /* Code for manipulating in-core parity logs
30 *
31 */
32
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.7 2001/11/13 07:11:15 lukem Exp $");
35
36 #include "rf_archs.h"
37
38 #if RF_INCLUDE_PARITYLOGGING > 0
39
40 /*
41 * Append-only log for recording parity "update" and "overwrite" records
42 */
43
44 #include <dev/raidframe/raidframevar.h>
45
46 #include "rf_threadstuff.h"
47 #include "rf_mcpair.h"
48 #include "rf_raid.h"
49 #include "rf_dag.h"
50 #include "rf_dagfuncs.h"
51 #include "rf_desc.h"
52 #include "rf_layout.h"
53 #include "rf_diskqueue.h"
54 #include "rf_etimer.h"
55 #include "rf_paritylog.h"
56 #include "rf_general.h"
57 #include "rf_map.h"
58 #include "rf_paritylogging.h"
59 #include "rf_paritylogDiskMgr.h"
60
61 static RF_CommonLogData_t *
62 AllocParityLogCommonData(RF_Raid_t * raidPtr)
63 {
64 RF_CommonLogData_t *common = NULL;
65 int rc;
66
67 /* Return a struct for holding common parity log information from the
68 * free list (rf_parityLogDiskQueue.freeCommonList). If the free list
69 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
70
71 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
72 if (raidPtr->parityLogDiskQueue.freeCommonList) {
73 common = raidPtr->parityLogDiskQueue.freeCommonList;
74 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
75 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
76 } else {
77 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
78 RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
79 rc = rf_mutex_init(&common->mutex);
80 if (rc) {
81 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
82 __LINE__, rc);
83 RF_Free(common, sizeof(RF_CommonLogData_t));
84 common = NULL;
85 }
86 }
87 common->next = NULL;
88 return (common);
89 }
90
91 static void
92 FreeParityLogCommonData(RF_CommonLogData_t * common)
93 {
94 RF_Raid_t *raidPtr;
95
96 /* Insert a single struct for holding parity log information (data)
97 * into the free list (rf_parityLogDiskQueue.freeCommonList).
98 * NON-BLOCKING */
99
100 raidPtr = common->raidPtr;
101 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
102 common->next = raidPtr->parityLogDiskQueue.freeCommonList;
103 raidPtr->parityLogDiskQueue.freeCommonList = common;
104 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
105 }
106
107 static RF_ParityLogData_t *
108 AllocParityLogData(RF_Raid_t * raidPtr)
109 {
110 RF_ParityLogData_t *data = NULL;
111
112 /* Return a struct for holding parity log information from the free
113 * list (rf_parityLogDiskQueue.freeList). If the free list is empty,
114 * call RF_Malloc to create a new structure. NON-BLOCKING */
115
116 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
117 if (raidPtr->parityLogDiskQueue.freeDataList) {
118 data = raidPtr->parityLogDiskQueue.freeDataList;
119 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
120 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
121 } else {
122 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
123 RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
124 }
125 data->next = NULL;
126 data->prev = NULL;
127 return (data);
128 }
129
130
131 static void
132 FreeParityLogData(RF_ParityLogData_t * data)
133 {
134 RF_ParityLogData_t *nextItem;
135 RF_Raid_t *raidPtr;
136
137 /* Insert a linked list of structs for holding parity log information
138 * (data) into the free list (parityLogDiskQueue.freeList).
139 * NON-BLOCKING */
140
141 raidPtr = data->common->raidPtr;
142 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
143 while (data) {
144 nextItem = data->next;
145 data->next = raidPtr->parityLogDiskQueue.freeDataList;
146 raidPtr->parityLogDiskQueue.freeDataList = data;
147 data = nextItem;
148 }
149 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
150 }
151
152
153 static void
154 EnqueueParityLogData(
155 RF_ParityLogData_t * data,
156 RF_ParityLogData_t ** head,
157 RF_ParityLogData_t ** tail)
158 {
159 RF_Raid_t *raidPtr;
160
161 /* Insert an in-core parity log (*data) into the head of a disk queue
162 * (*head, *tail). NON-BLOCKING */
163
164 raidPtr = data->common->raidPtr;
165 if (rf_parityLogDebug)
166 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
167 RF_ASSERT(data->prev == NULL);
168 RF_ASSERT(data->next == NULL);
169 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
170 if (*head) {
171 /* insert into head of queue */
172 RF_ASSERT((*head)->prev == NULL);
173 RF_ASSERT((*tail)->next == NULL);
174 data->next = *head;
175 (*head)->prev = data;
176 *head = data;
177 } else {
178 /* insert into empty list */
179 RF_ASSERT(*head == NULL);
180 RF_ASSERT(*tail == NULL);
181 *head = data;
182 *tail = data;
183 }
184 RF_ASSERT((*head)->prev == NULL);
185 RF_ASSERT((*tail)->next == NULL);
186 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
187 }
188
189 static RF_ParityLogData_t *
190 DequeueParityLogData(
191 RF_Raid_t * raidPtr,
192 RF_ParityLogData_t ** head,
193 RF_ParityLogData_t ** tail,
194 int ignoreLocks)
195 {
196 RF_ParityLogData_t *data;
197
198 /* Remove and return an in-core parity log from the tail of a disk
199 * queue (*head, *tail). NON-BLOCKING */
200
201 /* remove from tail, preserving FIFO order */
202 if (!ignoreLocks)
203 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
204 data = *tail;
205 if (data) {
206 if (*head == *tail) {
207 /* removing last item from queue */
208 *head = NULL;
209 *tail = NULL;
210 } else {
211 *tail = (*tail)->prev;
212 (*tail)->next = NULL;
213 RF_ASSERT((*head)->prev == NULL);
214 RF_ASSERT((*tail)->next == NULL);
215 }
216 data->next = NULL;
217 data->prev = NULL;
218 if (rf_parityLogDebug)
219 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
220 }
221 if (*head) {
222 RF_ASSERT((*head)->prev == NULL);
223 RF_ASSERT((*tail)->next == NULL);
224 }
225 if (!ignoreLocks)
226 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
227 return (data);
228 }
229
230
231 static void
232 RequeueParityLogData(
233 RF_ParityLogData_t * data,
234 RF_ParityLogData_t ** head,
235 RF_ParityLogData_t ** tail)
236 {
237 RF_Raid_t *raidPtr;
238
239 /* Insert an in-core parity log (*data) into the tail of a disk queue
240 * (*head, *tail). NON-BLOCKING */
241
242 raidPtr = data->common->raidPtr;
243 RF_ASSERT(data);
244 if (rf_parityLogDebug)
245 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
246 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
247 if (*tail) {
248 /* append to tail of list */
249 data->prev = *tail;
250 data->next = NULL;
251 (*tail)->next = data;
252 *tail = data;
253 } else {
254 /* inserting into an empty list */
255 *head = data;
256 *tail = data;
257 (*head)->prev = NULL;
258 (*tail)->next = NULL;
259 }
260 RF_ASSERT((*head)->prev == NULL);
261 RF_ASSERT((*tail)->next == NULL);
262 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
263 }
264
265 RF_ParityLogData_t *
266 rf_CreateParityLogData(
267 RF_ParityRecordType_t operation,
268 RF_PhysDiskAddr_t * pda,
269 caddr_t bufPtr,
270 RF_Raid_t * raidPtr,
271 int (*wakeFunc) (RF_DagNode_t * node, int status),
272 void *wakeArg,
273 RF_AccTraceEntry_t * tracerec,
274 RF_Etimer_t startTime)
275 {
276 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
277 RF_CommonLogData_t *common;
278 RF_PhysDiskAddr_t *diskAddress;
279 int boundary, offset = 0;
280
281 /* Return an initialized struct of info to be logged. Build one item
282 * per physical disk address, one item per region.
283 *
284 * NON-BLOCKING */
285
286 diskAddress = pda;
287 common = AllocParityLogCommonData(raidPtr);
288 RF_ASSERT(common);
289
290 common->operation = operation;
291 common->bufPtr = bufPtr;
292 common->raidPtr = raidPtr;
293 common->wakeFunc = wakeFunc;
294 common->wakeArg = wakeArg;
295 common->tracerec = tracerec;
296 common->startTime = startTime;
297 common->cnt = 0;
298
299 if (rf_parityLogDebug)
300 printf("[entering CreateParityLogData]\n");
301 while (diskAddress) {
302 common->cnt++;
303 data = AllocParityLogData(raidPtr);
304 RF_ASSERT(data);
305 data->common = common;
306 data->next = NULL;
307 data->prev = NULL;
308 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
309 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
310 /* disk address does not cross a region boundary */
311 data->diskAddress = *diskAddress;
312 data->bufOffset = offset;
313 offset = offset + diskAddress->numSector;
314 EnqueueParityLogData(data, &resultHead, &resultTail);
315 /* adjust disk address */
316 diskAddress = diskAddress->next;
317 } else {
318 /* disk address crosses a region boundary */
319 /* find address where region is crossed */
320 boundary = 0;
321 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
322 boundary++;
323
324 /* enter data before the boundary */
325 data->diskAddress = *diskAddress;
326 data->diskAddress.numSector = boundary;
327 data->bufOffset = offset;
328 offset += boundary;
329 EnqueueParityLogData(data, &resultHead, &resultTail);
330 /* adjust disk address */
331 diskAddress->startSector += boundary;
332 diskAddress->numSector -= boundary;
333 }
334 }
335 if (rf_parityLogDebug)
336 printf("[leaving CreateParityLogData]\n");
337 return (resultHead);
338 }
339
340
341 RF_ParityLogData_t *
342 rf_SearchAndDequeueParityLogData(
343 RF_Raid_t * raidPtr,
344 int regionID,
345 RF_ParityLogData_t ** head,
346 RF_ParityLogData_t ** tail,
347 int ignoreLocks)
348 {
349 RF_ParityLogData_t *w;
350
351 /* Remove and return an in-core parity log from a specified region
352 * (regionID). If a matching log is not found, return NULL.
353 *
354 * NON-BLOCKING. */
355
356 /* walk backward through a list, looking for an entry with a matching
357 * region ID */
358 if (!ignoreLocks)
359 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
360 w = (*tail);
361 while (w) {
362 if (w->regionID == regionID) {
363 /* remove an element from the list */
364 if (w == *tail) {
365 if (*head == *tail) {
366 /* removing only element in the list */
367 *head = NULL;
368 *tail = NULL;
369 } else {
370 /* removing last item in the list */
371 *tail = (*tail)->prev;
372 (*tail)->next = NULL;
373 RF_ASSERT((*head)->prev == NULL);
374 RF_ASSERT((*tail)->next == NULL);
375 }
376 } else {
377 if (w == *head) {
378 /* removing first item in the list */
379 *head = (*head)->next;
380 (*head)->prev = NULL;
381 RF_ASSERT((*head)->prev == NULL);
382 RF_ASSERT((*tail)->next == NULL);
383 } else {
384 /* removing an item from the middle of
385 * the list */
386 w->prev->next = w->next;
387 w->next->prev = w->prev;
388 RF_ASSERT((*head)->prev == NULL);
389 RF_ASSERT((*tail)->next == NULL);
390 }
391 }
392 w->prev = NULL;
393 w->next = NULL;
394 if (rf_parityLogDebug)
395 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
396 return (w);
397 } else
398 w = w->prev;
399 }
400 if (!ignoreLocks)
401 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
402 return (NULL);
403 }
404
405 static RF_ParityLogData_t *
406 DequeueMatchingLogData(
407 RF_Raid_t * raidPtr,
408 RF_ParityLogData_t ** head,
409 RF_ParityLogData_t ** tail)
410 {
411 RF_ParityLogData_t *logDataList, *logData;
412 int regionID;
413
414 /* Remove and return an in-core parity log from the tail of a disk
415 * queue (*head, *tail). Then remove all matching (identical
416 * regionIDs) logData and return as a linked list.
417 *
418 * NON-BLOCKING */
419
420 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
421 if (logDataList) {
422 regionID = logDataList->regionID;
423 logData = logDataList;
424 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
425 while (logData->next) {
426 logData = logData->next;
427 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
428 }
429 }
430 return (logDataList);
431 }
432
433
434 static RF_ParityLog_t *
435 AcquireParityLog(
436 RF_ParityLogData_t * logData,
437 int finish)
438 {
439 RF_ParityLog_t *log = NULL;
440 RF_Raid_t *raidPtr;
441
442 /* Grab a log buffer from the pool and return it. If no buffers are
443 * available, return NULL. NON-BLOCKING */
444 raidPtr = logData->common->raidPtr;
445 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
446 if (raidPtr->parityLogPool.parityLogs) {
447 log = raidPtr->parityLogPool.parityLogs;
448 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
449 log->regionID = logData->regionID;
450 log->numRecords = 0;
451 log->next = NULL;
452 raidPtr->logsInUse++;
453 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
454 } else {
455 /* no logs available, so place ourselves on the queue of work
456 * waiting on log buffers this is done while
457 * parityLogPool.mutex is held, to ensure synchronization with
458 * ReleaseParityLogs. */
459 if (rf_parityLogDebug)
460 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
461 if (finish)
462 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
463 else
464 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
465 }
466 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
467 return (log);
468 }
469
470 void
471 rf_ReleaseParityLogs(
472 RF_Raid_t * raidPtr,
473 RF_ParityLog_t * firstLog)
474 {
475 RF_ParityLogData_t *logDataList;
476 RF_ParityLog_t *log, *lastLog;
477 int cnt;
478
479 /* Insert a linked list of parity logs (firstLog) to the free list
480 * (parityLogPool.parityLogPool)
481 *
482 * NON-BLOCKING. */
483
484 RF_ASSERT(firstLog);
485
486 /* Before returning logs to global free list, service all requests
487 * which are blocked on logs. Holding mutexes for parityLogPool and
488 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
489 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
490 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
491 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
492 log = firstLog;
493 if (firstLog)
494 firstLog = firstLog->next;
495 log->numRecords = 0;
496 log->next = NULL;
497 while (logDataList && log) {
498 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
499 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
500 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
501 if (rf_parityLogDebug)
502 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
503 if (log == NULL) {
504 log = firstLog;
505 if (firstLog) {
506 firstLog = firstLog->next;
507 log->numRecords = 0;
508 log->next = NULL;
509 }
510 }
511 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
512 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
513 if (log)
514 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
515 }
516 /* return remaining logs to pool */
517 if (log) {
518 log->next = firstLog;
519 firstLog = log;
520 }
521 if (firstLog) {
522 lastLog = firstLog;
523 raidPtr->logsInUse--;
524 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
525 while (lastLog->next) {
526 lastLog = lastLog->next;
527 raidPtr->logsInUse--;
528 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
529 }
530 lastLog->next = raidPtr->parityLogPool.parityLogs;
531 raidPtr->parityLogPool.parityLogs = firstLog;
532 cnt = 0;
533 log = raidPtr->parityLogPool.parityLogs;
534 while (log) {
535 cnt++;
536 log = log->next;
537 }
538 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
539 }
540 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
541 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
542 }
543
544 static void
545 ReintLog(
546 RF_Raid_t * raidPtr,
547 int regionID,
548 RF_ParityLog_t * log)
549 {
550 RF_ASSERT(log);
551
552 /* Insert an in-core parity log (log) into the disk queue of
553 * reintegration work. Set the flag (reintInProgress) for the
554 * specified region (regionID) to indicate that reintegration is in
555 * progress for this region. NON-BLOCKING */
556
557 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
558 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint
559 * complete */
560
561 if (rf_parityLogDebug)
562 printf("[requesting reintegration of region %d]\n", log->regionID);
563 /* move record to reintegration queue */
564 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
565 log->next = raidPtr->parityLogDiskQueue.reintQueue;
566 raidPtr->parityLogDiskQueue.reintQueue = log;
567 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
568 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
569 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
570 }
571
572 static void
573 FlushLog(
574 RF_Raid_t * raidPtr,
575 RF_ParityLog_t * log)
576 {
577 /* insert a core log (log) into a list of logs
578 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
579 * NON-BLOCKING */
580
581 RF_ASSERT(log);
582 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
583 RF_ASSERT(log->next == NULL);
584 /* move log to flush queue */
585 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
586 log->next = raidPtr->parityLogDiskQueue.flushQueue;
587 raidPtr->parityLogDiskQueue.flushQueue = log;
588 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
589 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
590 }
591
592 static int
593 DumpParityLogToDisk(
594 int finish,
595 RF_ParityLogData_t * logData)
596 {
597 int i, diskCount, regionID = logData->regionID;
598 RF_ParityLog_t *log;
599 RF_Raid_t *raidPtr;
600
601 raidPtr = logData->common->raidPtr;
602
603 /* Move a core log to disk. If the log disk is full, initiate
604 * reintegration.
605 *
606 * Return (0) if we can enqueue the dump immediately, otherwise return
607 * (1) to indicate we are blocked on reintegration and control of the
608 * thread should be relinquished.
609 *
610 * Caller must hold regionInfo[regionID].mutex
611 *
612 * NON-BLOCKING */
613
614 if (rf_parityLogDebug)
615 printf("[dumping parity log to disk, region %d]\n", regionID);
616 log = raidPtr->regionInfo[regionID].coreLog;
617 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
618 RF_ASSERT(log->next == NULL);
619
620 /* if reintegration is in progress, must queue work */
621 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
622 if (raidPtr->regionInfo[regionID].reintInProgress) {
623 /* Can not proceed since this region is currently being
624 * reintegrated. We can not block, so queue remaining work and
625 * return */
626 if (rf_parityLogDebug)
627 printf("[region %d waiting on reintegration]\n", regionID);
628 /* XXX not sure about the use of finish - shouldn't this
629 * always be "Enqueue"? */
630 if (finish)
631 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
632 else
633 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
634 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
635 return (1); /* relenquish control of this thread */
636 }
637 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
638 raidPtr->regionInfo[regionID].coreLog = NULL;
639 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
640 /* IMPORTANT!! this loop bound assumes region disk holds an
641 * integral number of core logs */
642 {
643 /* update disk map for this region */
644 diskCount = raidPtr->regionInfo[regionID].diskCount;
645 for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
646 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
647 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
648 }
649 log->diskOffset = diskCount;
650 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
651 FlushLog(raidPtr, log);
652 } else {
653 /* no room for log on disk, send it to disk manager and
654 * request reintegration */
655 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
656 ReintLog(raidPtr, regionID, log);
657 }
658 if (rf_parityLogDebug)
659 printf("[finished dumping parity log to disk, region %d]\n", regionID);
660 return (0);
661 }
662
663 int
664 rf_ParityLogAppend(
665 RF_ParityLogData_t * logData,
666 int finish,
667 RF_ParityLog_t ** incomingLog,
668 int clearReintFlag)
669 {
670 int regionID, logItem, itemDone;
671 RF_ParityLogData_t *item;
672 int punt, done = RF_FALSE;
673 RF_ParityLog_t *log;
674 RF_Raid_t *raidPtr;
675 RF_Etimer_t timer;
676 int (*wakeFunc) (RF_DagNode_t * node, int status);
677 void *wakeArg;
678
679 /* Add parity to the appropriate log, one sector at a time. This
680 * routine is called is called by dag functions ParityLogUpdateFunc
681 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
682 *
683 * Parity to be logged is contained in a linked-list (logData). When
684 * this routine returns, every sector in the list will be in one of
685 * three places: 1) entered into the parity log 2) queued, waiting on
686 * reintegration 3) queued, waiting on a core log
687 *
688 * Blocked work is passed to the ParityLoggingDiskManager for completion.
689 * Later, as conditions which required the block are removed, the work
690 * reenters this routine with the "finish" parameter set to "RF_TRUE."
691 *
692 * NON-BLOCKING */
693
694 raidPtr = logData->common->raidPtr;
695 /* lock the region for the first item in logData */
696 RF_ASSERT(logData != NULL);
697 regionID = logData->regionID;
698 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
699 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
700
701 if (clearReintFlag) {
702 /* Enable flushing for this region. Holding both locks
703 * provides a synchronization barrier with DumpParityLogToDisk */
704 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
705 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
706 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
707 raidPtr->regionInfo[regionID].diskCount = 0;
708 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
709 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now
710 * enabled */
711 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
712 }
713 /* process each item in logData */
714 while (logData) {
715 /* remove an item from logData */
716 item = logData;
717 logData = logData->next;
718 item->next = NULL;
719 item->prev = NULL;
720
721 if (rf_parityLogDebug)
722 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
723
724 /* see if we moved to a new region */
725 if (regionID != item->regionID) {
726 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
727 regionID = item->regionID;
728 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
729 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
730 }
731 punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This
732 * can happen in one of two ways: 1) no core
733 * log (AcquireParityLog) 2) waiting on
734 * reintegration (DumpParityLogToDisk) If punt
735 * is RF_TRUE, the dataItem was queued, so
736 * skip to next item. */
737
738 /* process item, one sector at a time, until all sectors
739 * processed or we punt */
740 if (item->diskAddress.numSector > 0)
741 done = RF_FALSE;
742 else
743 RF_ASSERT(0);
744 while (!punt && !done) {
745 /* verify that a core log exists for this region */
746 if (!raidPtr->regionInfo[regionID].coreLog) {
747 /* Attempt to acquire a parity log. If
748 * acquisition fails, queue remaining work in
749 * data item and move to nextItem. */
750 if (incomingLog)
751 if (*incomingLog) {
752 RF_ASSERT((*incomingLog)->next == NULL);
753 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
754 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
755 *incomingLog = NULL;
756 } else
757 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
758 else
759 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
760 /* Note: AcquireParityLog either returns a log
761 * or enqueues currentItem */
762 }
763 if (!raidPtr->regionInfo[regionID].coreLog)
764 punt = RF_TRUE; /* failed to find a core log */
765 else {
766 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
767 /* verify that the log has room for new
768 * entries */
769 /* if log is full, dump it to disk and grab a
770 * new log */
771 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
772 /* log is full, dump it to disk */
773 if (DumpParityLogToDisk(finish, item))
774 punt = RF_TRUE; /* dump unsuccessful,
775 * blocked on
776 * reintegration */
777 else {
778 /* dump was successful */
779 if (incomingLog)
780 if (*incomingLog) {
781 RF_ASSERT((*incomingLog)->next == NULL);
782 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
783 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
784 *incomingLog = NULL;
785 } else
786 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
787 else
788 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
789 /* if a core log is not
790 * available, must queue work
791 * and return */
792 if (!raidPtr->regionInfo[regionID].coreLog)
793 punt = RF_TRUE; /* blocked on log
794 * availability */
795 }
796 }
797 }
798 /* if we didn't punt on this item, attempt to add a
799 * sector to the core log */
800 if (!punt) {
801 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
802 /* at this point, we have a core log with
803 * enough room for a sector */
804 /* copy a sector into the log */
805 log = raidPtr->regionInfo[regionID].coreLog;
806 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
807 logItem = log->numRecords++;
808 log->records[logItem].parityAddr = item->diskAddress;
809 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
810 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
811 log->records[logItem].parityAddr.numSector = 1;
812 log->records[logItem].operation = item->common->operation;
813 bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector));
814 item->diskAddress.numSector--;
815 item->diskAddress.startSector++;
816 if (item->diskAddress.numSector == 0)
817 done = RF_TRUE;
818 }
819 }
820
821 if (!punt) {
822 /* Processed this item completely, decrement count of
823 * items to be processed. */
824 RF_ASSERT(item->diskAddress.numSector == 0);
825 RF_LOCK_MUTEX(item->common->mutex);
826 item->common->cnt--;
827 if (item->common->cnt == 0)
828 itemDone = RF_TRUE;
829 else
830 itemDone = RF_FALSE;
831 RF_UNLOCK_MUTEX(item->common->mutex);
832 if (itemDone) {
833 /* Finished processing all log data for this
834 * IO Return structs to free list and invoke
835 * wakeup function. */
836 timer = item->common->startTime; /* grab initial value of
837 * timer */
838 RF_ETIMER_STOP(timer);
839 RF_ETIMER_EVAL(timer);
840 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
841 if (rf_parityLogDebug)
842 printf("[waking process for region %d]\n", item->regionID);
843 wakeFunc = item->common->wakeFunc;
844 wakeArg = item->common->wakeArg;
845 FreeParityLogCommonData(item->common);
846 FreeParityLogData(item);
847 (wakeFunc) (wakeArg, 0);
848 } else
849 FreeParityLogData(item);
850 }
851 }
852 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
853 if (rf_parityLogDebug)
854 printf("[exiting ParityLogAppend]\n");
855 return (0);
856 }
857
858
859 void
860 rf_EnableParityLogging(RF_Raid_t * raidPtr)
861 {
862 int regionID;
863
864 for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
865 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
866 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
867 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
868 }
869 if (rf_parityLogDebug)
870 printf("[parity logging enabled]\n");
871 }
872 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
873