rf_paritylog.c revision 1.1 1 /* $NetBSD: rf_paritylog.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /* Code for manipulating in-core parity logs
30 *
31 * :
32 * Log: rf_paritylog.c,v
33 * Revision 1.27 1996/07/28 20:31:39 jimz
34 * i386netbsd port
35 * true/false fixup
36 *
37 * Revision 1.26 1996/07/27 23:36:08 jimz
38 * Solaris port of simulator
39 *
40 * Revision 1.25 1996/07/17 21:00:58 jimz
41 * clean up timer interface, tracing
42 *
43 * Revision 1.24 1996/06/11 10:18:59 jimz
44 * AllocParityLogCommonData() was freeing the common pointer immediately
45 * after allocating this. It appeared that this free really belonged
46 * inside one of the failure cases (for backing out), so I moved it
47 * in there.
48 *
49 * Revision 1.23 1996/06/05 18:06:02 jimz
50 * Major code cleanup. The Great Renaming is now done.
51 * Better modularity. Better typing. Fixed a bunch of
52 * synchronization bugs. Made a lot of global stuff
53 * per-desc or per-array. Removed dead code.
54 *
55 * Revision 1.22 1996/06/02 17:31:48 jimz
56 * Moved a lot of global stuff into array structure, where it belongs.
57 * Fixed up paritylogging, pss modules in this manner. Some general
58 * code cleanup. Removed lots of dead code, some dead files.
59 *
60 * Revision 1.21 1996/05/31 22:26:54 jimz
61 * fix a lot of mapping problems, memory allocation problems
62 * found some weird lock issues, fixed 'em
63 * more code cleanup
64 *
65 * Revision 1.20 1996/05/30 23:22:16 jimz
66 * bugfixes of serialization, timing problems
67 * more cleanup
68 *
69 * Revision 1.19 1996/05/30 12:59:18 jimz
70 * make etimer happier, more portable
71 *
72 * Revision 1.18 1996/05/27 18:56:37 jimz
73 * more code cleanup
74 * better typing
75 * compiles in all 3 environments
76 *
77 * Revision 1.17 1996/05/24 04:28:55 jimz
78 * release cleanup ckpt
79 *
80 * Revision 1.16 1996/05/23 21:46:35 jimz
81 * checkpoint in code cleanup (release prep)
82 * lots of types, function names have been fixed
83 *
84 * Revision 1.15 1996/05/23 00:33:23 jimz
85 * code cleanup: move all debug decls to rf_options.c, all extern
86 * debug decls to rf_options.h, all debug vars preceded by rf_
87 *
88 * Revision 1.14 1996/05/20 16:16:59 jimz
89 * switch to rf_{mutex,cond}_{init,destroy}
90 *
91 * Revision 1.13 1996/05/18 19:51:34 jimz
92 * major code cleanup- fix syntax, make some types consistent,
93 * add prototypes, clean out dead code, et cetera
94 *
95 * Revision 1.12 1995/12/12 18:10:06 jimz
96 * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
97 * fix 80-column brain damage in comments
98 *
99 * Revision 1.11 1995/12/06 20:54:44 wvcii
100 * added prototyping
101 *
102 * Revision 1.10 1995/11/30 16:05:37 wvcii
103 * added copyright info
104 *
105 * Revision 1.9 1995/10/08 20:41:28 wvcii
106 * fixed bug in allocation of CommonLogData (was allocating incorrect size)
107 *
108 * Revision 1.8 1995/09/07 15:52:12 jimz
109 * noop compile when INCLUDE_PARITYLOGGING not defined
110 *
111 * Revision 1.7 1995/09/06 19:17:36 wvcii
112 * moved code for reintegration to rf_paritylogDiskMgr.c
113 *
114 * Revision 1.6 95/07/07 00:16:06 wvcii
115 * this version free from deadlock, fails parity verification
116 *
117 * Revision 1.5 1995/06/09 13:14:24 wvcii
118 * code is now nonblocking
119 *
120 * Revision 1.4 95/06/01 17:01:59 wvcii
121 * code debug
122 *
123 * Revision 1.3 95/05/31 13:08:23 wvcii
124 * code debug
125 *
126 * Revision 1.2 95/05/21 15:42:15 wvcii
127 * code debug
128 *
129 * Revision 1.1 95/05/18 10:43:54 wvcii
130 * Initial revision
131 *
132 */
133
134 #include "rf_archs.h"
135
136 #if RF_INCLUDE_PARITYLOGGING > 0
137
138 /*
139 * Append-only log for recording parity "update" and "overwrite" records
140 */
141
142 #include "rf_types.h"
143 #include "rf_threadstuff.h"
144 #include "rf_mcpair.h"
145 #include "rf_raid.h"
146 #include "rf_dag.h"
147 #include "rf_dagfuncs.h"
148 #include "rf_desc.h"
149 #include "rf_layout.h"
150 #include "rf_diskqueue.h"
151 #include "rf_etimer.h"
152 #include "rf_paritylog.h"
153 #include "rf_general.h"
154 #include "rf_threadid.h"
155 #include "rf_map.h"
156 #include "rf_paritylogging.h"
157 #include "rf_paritylogDiskMgr.h"
158 #include "rf_sys.h"
159
160 static RF_CommonLogData_t *AllocParityLogCommonData(RF_Raid_t *raidPtr)
161 {
162 RF_CommonLogData_t *common = NULL;
163 int rc;
164
165 /* Return a struct for holding common parity log information from the free
166 list (rf_parityLogDiskQueue.freeCommonList). If the free list is empty, call
167 RF_Malloc to create a new structure.
168 NON-BLOCKING */
169
170 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
171 if (raidPtr->parityLogDiskQueue.freeCommonList)
172 {
173 common = raidPtr->parityLogDiskQueue.freeCommonList;
174 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
175 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
176 }
177 else
178 {
179 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
180 RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
181 rc = rf_mutex_init(&common->mutex);
182 if (rc) {
183 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
184 __LINE__, rc);
185 RF_Free(common, sizeof(RF_CommonLogData_t));
186 common = NULL;
187 }
188 }
189 common->next = NULL;
190 return(common);
191 }
192
193 static void FreeParityLogCommonData(RF_CommonLogData_t *common)
194 {
195 RF_Raid_t *raidPtr;
196
197 /* Insert a single struct for holding parity log information
198 (data) into the free list (rf_parityLogDiskQueue.freeCommonList).
199 NON-BLOCKING */
200
201 raidPtr = common->raidPtr;
202 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
203 common->next = raidPtr->parityLogDiskQueue.freeCommonList;
204 raidPtr->parityLogDiskQueue.freeCommonList = common;
205 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
206 }
207
208 static RF_ParityLogData_t *AllocParityLogData(RF_Raid_t *raidPtr)
209 {
210 RF_ParityLogData_t *data = NULL;
211
212 /* Return a struct for holding parity log information from the free
213 list (rf_parityLogDiskQueue.freeList). If the free list is empty, call
214 RF_Malloc to create a new structure.
215 NON-BLOCKING */
216
217 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
218 if (raidPtr->parityLogDiskQueue.freeDataList)
219 {
220 data = raidPtr->parityLogDiskQueue.freeDataList;
221 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
222 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
223 }
224 else
225 {
226 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
227 RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
228 }
229 data->next = NULL;
230 data->prev = NULL;
231 return(data);
232 }
233
234
235 static void FreeParityLogData(RF_ParityLogData_t *data)
236 {
237 RF_ParityLogData_t *nextItem;
238 RF_Raid_t *raidPtr;
239
240 /* Insert a linked list of structs for holding parity log
241 information (data) into the free list (parityLogDiskQueue.freeList).
242 NON-BLOCKING */
243
244 raidPtr = data->common->raidPtr;
245 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
246 while (data)
247 {
248 nextItem = data->next;
249 data->next = raidPtr->parityLogDiskQueue.freeDataList;
250 raidPtr->parityLogDiskQueue.freeDataList = data;
251 data = nextItem;
252 }
253 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
254 }
255
256
257 static void EnqueueParityLogData(
258 RF_ParityLogData_t *data,
259 RF_ParityLogData_t **head,
260 RF_ParityLogData_t **tail)
261 {
262 RF_Raid_t *raidPtr;
263
264 /* Insert an in-core parity log (*data) into the head of
265 a disk queue (*head, *tail).
266 NON-BLOCKING */
267
268 raidPtr = data->common->raidPtr;
269 if (rf_parityLogDebug)
270 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int)data->diskAddress.numSector);
271 RF_ASSERT(data->prev == NULL);
272 RF_ASSERT(data->next == NULL);
273 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
274 if (*head)
275 {
276 /* insert into head of queue */
277 RF_ASSERT((*head)->prev == NULL);
278 RF_ASSERT((*tail)->next == NULL);
279 data->next = *head;
280 (*head)->prev = data;
281 *head = data;
282 }
283 else
284 {
285 /* insert into empty list */
286 RF_ASSERT(*head == NULL);
287 RF_ASSERT(*tail == NULL);
288 *head = data;
289 *tail = data;
290 }
291 RF_ASSERT((*head)->prev == NULL);
292 RF_ASSERT((*tail)->next == NULL);
293 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
294 }
295
296 static RF_ParityLogData_t *DequeueParityLogData(
297 RF_Raid_t *raidPtr,
298 RF_ParityLogData_t **head,
299 RF_ParityLogData_t **tail,
300 int ignoreLocks)
301 {
302 RF_ParityLogData_t *data;
303
304 /* Remove and return an in-core parity log from the tail of
305 a disk queue (*head, *tail).
306 NON-BLOCKING */
307
308 /* remove from tail, preserving FIFO order */
309 if (!ignoreLocks)
310 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
311 data = *tail;
312 if (data)
313 {
314 if (*head == *tail)
315 {
316 /* removing last item from queue */
317 *head = NULL;
318 *tail = NULL;
319 }
320 else
321 {
322 *tail = (*tail)->prev;
323 (*tail)->next = NULL;
324 RF_ASSERT((*head)->prev == NULL);
325 RF_ASSERT((*tail)->next == NULL);
326 }
327 data->next = NULL;
328 data->prev = NULL;
329 if (rf_parityLogDebug)
330 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int)data->diskAddress.numSector);
331 }
332 if (*head)
333 {
334 RF_ASSERT((*head)->prev == NULL);
335 RF_ASSERT((*tail)->next == NULL);
336 }
337 if (!ignoreLocks)
338 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
339 return(data);
340 }
341
342
343 static void RequeueParityLogData(
344 RF_ParityLogData_t *data,
345 RF_ParityLogData_t **head,
346 RF_ParityLogData_t **tail)
347 {
348 RF_Raid_t *raidPtr;
349
350 /* Insert an in-core parity log (*data) into the tail of
351 a disk queue (*head, *tail).
352 NON-BLOCKING */
353
354 raidPtr = data->common->raidPtr;
355 RF_ASSERT(data);
356 if (rf_parityLogDebug)
357 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
358 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
359 if (*tail)
360 {
361 /* append to tail of list */
362 data->prev = *tail;
363 data->next = NULL;
364 (*tail)->next = data;
365 *tail = data;
366 }
367 else
368 {
369 /* inserting into an empty list */
370 *head = data;
371 *tail = data;
372 (*head)->prev = NULL;
373 (*tail)->next = NULL;
374 }
375 RF_ASSERT((*head)->prev == NULL);
376 RF_ASSERT((*tail)->next == NULL);
377 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
378 }
379
380 RF_ParityLogData_t *rf_CreateParityLogData(
381 RF_ParityRecordType_t operation,
382 RF_PhysDiskAddr_t *pda,
383 caddr_t bufPtr,
384 RF_Raid_t *raidPtr,
385 int (*wakeFunc)(RF_DagNode_t *node, int status),
386 void *wakeArg,
387 RF_AccTraceEntry_t *tracerec,
388 RF_Etimer_t startTime)
389 {
390 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
391 RF_CommonLogData_t *common;
392 RF_PhysDiskAddr_t *diskAddress;
393 int boundary, offset = 0;
394
395 /* Return an initialized struct of info to be logged.
396 Build one item per physical disk address, one item per region.
397
398 NON-BLOCKING */
399
400 diskAddress = pda;
401 common = AllocParityLogCommonData(raidPtr);
402 RF_ASSERT(common);
403
404 common->operation = operation;
405 common->bufPtr = bufPtr;
406 common->raidPtr = raidPtr;
407 common->wakeFunc = wakeFunc;
408 common->wakeArg = wakeArg;
409 common->tracerec = tracerec;
410 common->startTime = startTime;
411 common->cnt = 0;
412
413 if (rf_parityLogDebug)
414 printf("[entering CreateParityLogData]\n");
415 while (diskAddress)
416 {
417 common->cnt++;
418 data = AllocParityLogData(raidPtr);
419 RF_ASSERT(data);
420 data->common = common;
421 data->next = NULL;
422 data->prev = NULL;
423 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
424 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1))
425 {
426 /* disk address does not cross a region boundary */
427 data->diskAddress = *diskAddress;
428 data->bufOffset = offset;
429 offset = offset + diskAddress->numSector;
430 EnqueueParityLogData(data, &resultHead, &resultTail);
431 /* adjust disk address */
432 diskAddress = diskAddress->next;
433 }
434 else
435 {
436 /* disk address crosses a region boundary */
437 /* find address where region is crossed */
438 boundary = 0;
439 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
440 boundary++;
441
442 /* enter data before the boundary */
443 data->diskAddress = *diskAddress;
444 data->diskAddress.numSector = boundary;
445 data->bufOffset = offset;
446 offset += boundary;
447 EnqueueParityLogData(data, &resultHead, &resultTail);
448 /* adjust disk address */
449 diskAddress->startSector += boundary;
450 diskAddress->numSector -= boundary;
451 }
452 }
453 if (rf_parityLogDebug)
454 printf("[leaving CreateParityLogData]\n");
455 return(resultHead);
456 }
457
458
459 RF_ParityLogData_t *rf_SearchAndDequeueParityLogData(
460 RF_Raid_t *raidPtr,
461 int regionID,
462 RF_ParityLogData_t **head,
463 RF_ParityLogData_t **tail,
464 int ignoreLocks)
465 {
466 RF_ParityLogData_t *w;
467
468 /* Remove and return an in-core parity log from a specified region (regionID).
469 If a matching log is not found, return NULL.
470
471 NON-BLOCKING.
472 */
473
474 /* walk backward through a list, looking for an entry with a matching region ID */
475 if (!ignoreLocks)
476 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
477 w = (*tail);
478 while (w)
479 {
480 if (w->regionID == regionID)
481 {
482 /* remove an element from the list */
483 if (w == *tail)
484 {
485 if (*head == *tail)
486 {
487 /* removing only element in the list */
488 *head = NULL;
489 *tail = NULL;
490 }
491 else
492 {
493 /* removing last item in the list */
494 *tail = (*tail)->prev;
495 (*tail)->next = NULL;
496 RF_ASSERT((*head)->prev == NULL);
497 RF_ASSERT((*tail)->next == NULL);
498 }
499 }
500 else
501 {
502 if (w == *head)
503 {
504 /* removing first item in the list */
505 *head = (*head)->next;
506 (*head)->prev = NULL;
507 RF_ASSERT((*head)->prev == NULL);
508 RF_ASSERT((*tail)->next == NULL);
509 }
510 else
511 {
512 /* removing an item from the middle of the list */
513 w->prev->next = w->next;
514 w->next->prev = w->prev;
515 RF_ASSERT((*head)->prev == NULL);
516 RF_ASSERT((*tail)->next == NULL);
517 }
518 }
519 w->prev = NULL;
520 w->next = NULL;
521 if (rf_parityLogDebug)
522 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n",w->regionID,(int)w->diskAddress.raidAddress,(int) w->diskAddress.numSector);
523 return(w);
524 }
525 else
526 w = w->prev;
527 }
528 if (!ignoreLocks)
529 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
530 return(NULL);
531 }
532
533 static RF_ParityLogData_t *DequeueMatchingLogData(
534 RF_Raid_t *raidPtr,
535 RF_ParityLogData_t **head,
536 RF_ParityLogData_t **tail)
537 {
538 RF_ParityLogData_t *logDataList, *logData;
539 int regionID;
540
541 /* Remove and return an in-core parity log from the tail of
542 a disk queue (*head, *tail). Then remove all matching
543 (identical regionIDs) logData and return as a linked list.
544
545 NON-BLOCKING
546 */
547
548 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
549 if (logDataList)
550 {
551 regionID = logDataList->regionID;
552 logData = logDataList;
553 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
554 while (logData->next)
555 {
556 logData = logData->next;
557 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
558 }
559 }
560 return(logDataList);
561 }
562
563
564 static RF_ParityLog_t *AcquireParityLog(
565 RF_ParityLogData_t *logData,
566 int finish)
567 {
568 RF_ParityLog_t *log = NULL;
569 RF_Raid_t *raidPtr;
570
571 /* Grab a log buffer from the pool and return it.
572 If no buffers are available, return NULL.
573 NON-BLOCKING
574 */
575 raidPtr = logData->common->raidPtr;
576 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
577 if (raidPtr->parityLogPool.parityLogs)
578 {
579 log = raidPtr->parityLogPool.parityLogs;
580 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
581 log->regionID = logData->regionID;
582 log->numRecords = 0;
583 log->next = NULL;
584 raidPtr->logsInUse++;
585 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
586 }
587 else
588 {
589 /* no logs available, so place ourselves on the queue of work waiting on log buffers
590 this is done while parityLogPool.mutex is held, to ensure synchronization
591 with ReleaseParityLogs.
592 */
593 if (rf_parityLogDebug)
594 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
595 if (finish)
596 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
597 else
598 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
599 }
600 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
601 return(log);
602 }
603
604 void rf_ReleaseParityLogs(
605 RF_Raid_t *raidPtr,
606 RF_ParityLog_t *firstLog)
607 {
608 RF_ParityLogData_t *logDataList;
609 RF_ParityLog_t *log, *lastLog;
610 int cnt;
611
612 /* Insert a linked list of parity logs (firstLog) to
613 the free list (parityLogPool.parityLogPool)
614
615 NON-BLOCKING.
616 */
617
618 RF_ASSERT(firstLog);
619
620 /* Before returning logs to global free list, service all
621 requests which are blocked on logs. Holding mutexes for parityLogPool and parityLogDiskQueue
622 forces synchronization with AcquireParityLog().
623 */
624 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
625 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
626 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
627 log = firstLog;
628 if (firstLog)
629 firstLog = firstLog->next;
630 log->numRecords = 0;
631 log->next = NULL;
632 while (logDataList && log)
633 {
634 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
635 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
636 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
637 if (rf_parityLogDebug)
638 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
639 if (log == NULL)
640 {
641 log = firstLog;
642 if (firstLog)
643 {
644 firstLog = firstLog->next;
645 log->numRecords = 0;
646 log->next = NULL;
647 }
648 }
649 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
650 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
651 if (log)
652 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
653 }
654 /* return remaining logs to pool */
655 if (log)
656 {
657 log->next = firstLog;
658 firstLog = log;
659 }
660 if (firstLog)
661 {
662 lastLog = firstLog;
663 raidPtr->logsInUse--;
664 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
665 while (lastLog->next)
666 {
667 lastLog = lastLog->next;
668 raidPtr->logsInUse--;
669 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
670 }
671 lastLog->next = raidPtr->parityLogPool.parityLogs;
672 raidPtr->parityLogPool.parityLogs = firstLog;
673 cnt = 0;
674 log = raidPtr->parityLogPool.parityLogs;
675 while (log)
676 {
677 cnt++;
678 log = log->next;
679 }
680 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
681 }
682 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
683 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
684 }
685
686 static void ReintLog(
687 RF_Raid_t *raidPtr,
688 int regionID,
689 RF_ParityLog_t *log)
690 {
691 RF_ASSERT(log);
692
693 /* Insert an in-core parity log (log) into the disk queue of reintegration
694 work. Set the flag (reintInProgress) for the specified region (regionID)
695 to indicate that reintegration is in progress for this region.
696 NON-BLOCKING
697 */
698
699 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
700 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint complete */
701
702 if (rf_parityLogDebug)
703 printf("[requesting reintegration of region %d]\n", log->regionID);
704 /* move record to reintegration queue */
705 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
706 log->next = raidPtr->parityLogDiskQueue.reintQueue;
707 raidPtr->parityLogDiskQueue.reintQueue = log;
708 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
709 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
710 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
711 }
712
713 static void FlushLog(
714 RF_Raid_t *raidPtr,
715 RF_ParityLog_t *log)
716 {
717 /* insert a core log (log) into a list of logs (parityLogDiskQueue.flushQueue)
718 waiting to be written to disk.
719 NON-BLOCKING
720 */
721
722 RF_ASSERT(log);
723 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
724 RF_ASSERT(log->next == NULL);
725 /* move log to flush queue */
726 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
727 log->next = raidPtr->parityLogDiskQueue.flushQueue;
728 raidPtr->parityLogDiskQueue.flushQueue = log;
729 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
730 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
731 }
732
733 static int DumpParityLogToDisk(
734 int finish,
735 RF_ParityLogData_t *logData)
736 {
737 int i, diskCount, regionID = logData->regionID;
738 RF_ParityLog_t *log;
739 RF_Raid_t *raidPtr;
740
741 raidPtr = logData->common->raidPtr;
742
743 /* Move a core log to disk. If the log disk is full, initiate
744 reintegration.
745
746 Return (0) if we can enqueue the dump immediately, otherwise
747 return (1) to indicate we are blocked on reintegration and
748 control of the thread should be relinquished.
749
750 Caller must hold regionInfo[regionID].mutex
751
752 NON-BLOCKING
753 */
754
755 if (rf_parityLogDebug)
756 printf("[dumping parity log to disk, region %d]\n", regionID);
757 log = raidPtr->regionInfo[regionID].coreLog;
758 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
759 RF_ASSERT(log->next == NULL);
760
761 /* if reintegration is in progress, must queue work */
762 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
763 if (raidPtr->regionInfo[regionID].reintInProgress)
764 {
765 /* Can not proceed since this region is currently being reintegrated.
766 We can not block, so queue remaining work and return */
767 if (rf_parityLogDebug)
768 printf("[region %d waiting on reintegration]\n",regionID);
769 /* XXX not sure about the use of finish - shouldn't this always be "Enqueue"? */
770 if (finish)
771 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
772 else
773 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
774 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
775 return(1); /* relenquish control of this thread */
776 }
777 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
778 raidPtr->regionInfo[regionID].coreLog = NULL;
779 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
780 /* IMPORTANT!! this loop bound assumes region disk holds an integral number of core logs */
781 {
782 /* update disk map for this region */
783 diskCount = raidPtr->regionInfo[regionID].diskCount;
784 for (i = 0; i < raidPtr->numSectorsPerLog; i++)
785 {
786 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
787 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
788 }
789 log->diskOffset = diskCount;
790 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
791 FlushLog(raidPtr, log);
792 }
793 else
794 {
795 /* no room for log on disk, send it to disk manager and request reintegration */
796 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
797 ReintLog(raidPtr, regionID, log);
798 }
799 if (rf_parityLogDebug)
800 printf("[finished dumping parity log to disk, region %d]\n", regionID);
801 return(0);
802 }
803
804 int rf_ParityLogAppend(
805 RF_ParityLogData_t *logData,
806 int finish,
807 RF_ParityLog_t **incomingLog,
808 int clearReintFlag)
809 {
810 int regionID, logItem, itemDone;
811 RF_ParityLogData_t *item;
812 int punt, done = RF_FALSE;
813 RF_ParityLog_t *log;
814 RF_Raid_t *raidPtr;
815 RF_Etimer_t timer;
816 int (*wakeFunc)(RF_DagNode_t *node, int status);
817 void *wakeArg;
818
819 /* Add parity to the appropriate log, one sector at a time.
820 This routine is called is called by dag functions ParityLogUpdateFunc
821 and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
822
823 Parity to be logged is contained in a linked-list (logData). When
824 this routine returns, every sector in the list will be in one of
825 three places:
826 1) entered into the parity log
827 2) queued, waiting on reintegration
828 3) queued, waiting on a core log
829
830 Blocked work is passed to the ParityLoggingDiskManager for completion.
831 Later, as conditions which required the block are removed, the work
832 reenters this routine with the "finish" parameter set to "RF_TRUE."
833
834 NON-BLOCKING
835 */
836
837 raidPtr = logData->common->raidPtr;
838 /* lock the region for the first item in logData */
839 RF_ASSERT(logData != NULL);
840 regionID = logData->regionID;
841 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
842 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
843
844 if (clearReintFlag)
845 {
846 /* Enable flushing for this region. Holding both locks provides
847 a synchronization barrier with DumpParityLogToDisk
848 */
849 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
850 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
851 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
852 raidPtr->regionInfo[regionID].diskCount = 0;
853 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
854 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now enabled */
855 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
856 }
857
858 /* process each item in logData */
859 while (logData)
860 {
861 /* remove an item from logData */
862 item = logData;
863 logData = logData->next;
864 item->next = NULL;
865 item->prev = NULL;
866
867 if (rf_parityLogDebug)
868 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n",item->regionID,(int)item->diskAddress.raidAddress, (int)item->diskAddress.numSector);
869
870 /* see if we moved to a new region */
871 if (regionID != item->regionID)
872 {
873 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
874 regionID = item->regionID;
875 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
876 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
877 }
878
879 punt = RF_FALSE; /* Set to RF_TRUE if work is blocked. This can happen in one of two ways:
880 1) no core log (AcquireParityLog)
881 2) waiting on reintegration (DumpParityLogToDisk)
882 If punt is RF_TRUE, the dataItem was queued, so skip to next item.
883 */
884
885 /* process item, one sector at a time, until all sectors processed or we punt */
886 if (item->diskAddress.numSector > 0)
887 done = RF_FALSE;
888 else
889 RF_ASSERT(0);
890 while (!punt && !done)
891 {
892 /* verify that a core log exists for this region */
893 if (!raidPtr->regionInfo[regionID].coreLog)
894 {
895 /* Attempt to acquire a parity log.
896 If acquisition fails, queue remaining work in data item and move to nextItem.
897 */
898 if (incomingLog)
899 if (*incomingLog)
900 {
901 RF_ASSERT((*incomingLog)->next == NULL);
902 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
903 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
904 *incomingLog = NULL;
905 }
906 else
907 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
908 else
909 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
910 /* Note: AcquireParityLog either returns a log or enqueues currentItem */
911 }
912 if (!raidPtr->regionInfo[regionID].coreLog)
913 punt = RF_TRUE; /* failed to find a core log */
914 else
915 {
916 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
917 /* verify that the log has room for new entries */
918 /* if log is full, dump it to disk and grab a new log */
919 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog)
920 {
921 /* log is full, dump it to disk */
922 if (DumpParityLogToDisk(finish, item))
923 punt = RF_TRUE; /* dump unsuccessful, blocked on reintegration */
924 else
925 {
926 /* dump was successful */
927 if (incomingLog)
928 if (*incomingLog)
929 {
930 RF_ASSERT((*incomingLog)->next == NULL);
931 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
932 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
933 *incomingLog = NULL;
934 }
935 else
936 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
937 else
938 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
939 /* if a core log is not available, must queue work and return */
940 if (!raidPtr->regionInfo[regionID].coreLog)
941 punt = RF_TRUE; /* blocked on log availability */
942 }
943 }
944 }
945 /* if we didn't punt on this item, attempt to add a sector to the core log */
946 if (!punt)
947 {
948 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
949 /* at this point, we have a core log with enough room for a sector */
950 /* copy a sector into the log */
951 log = raidPtr->regionInfo[regionID].coreLog;
952 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
953 logItem = log->numRecords++;
954 log->records[logItem].parityAddr = item->diskAddress;
955 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
956 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
957 log->records[logItem].parityAddr.numSector = 1;
958 log->records[logItem].operation = item->common->operation;
959 bcopy((item->common->bufPtr + (item->bufOffset++ * (1<<item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1<<item->common->raidPtr->logBytesPerSector)), (1<<item->common->raidPtr->logBytesPerSector));
960 item->diskAddress.numSector--;
961 item->diskAddress.startSector++;
962 if (item->diskAddress.numSector == 0)
963 done = RF_TRUE;
964 }
965 }
966
967 if (!punt)
968 {
969 /* Processed this item completely, decrement count of items
970 to be processed.
971 */
972 RF_ASSERT(item->diskAddress.numSector == 0);
973 RF_LOCK_MUTEX(item->common->mutex);
974 item->common->cnt--;
975 if (item->common->cnt == 0)
976 itemDone = RF_TRUE;
977 else
978 itemDone = RF_FALSE;
979 RF_UNLOCK_MUTEX(item->common->mutex);
980 if (itemDone)
981 {
982 /* Finished processing all log data for this IO
983 Return structs to free list and invoke wakeup function.
984 */
985 timer = item->common->startTime; /* grab initial value of timer */
986 RF_ETIMER_STOP(timer);
987 RF_ETIMER_EVAL(timer);
988 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
989 if (rf_parityLogDebug)
990 printf("[waking process for region %d]\n", item->regionID);
991 wakeFunc = item->common->wakeFunc;
992 wakeArg = item->common->wakeArg;
993 FreeParityLogCommonData(item->common);
994 FreeParityLogData(item);
995 (wakeFunc)(wakeArg, 0);
996 }
997 else
998 FreeParityLogData(item);
999 }
1000 }
1001 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
1002 if (rf_parityLogDebug)
1003 printf("[exiting ParityLogAppend]\n");
1004 return(0);
1005 }
1006
1007
1008 void rf_EnableParityLogging(RF_Raid_t *raidPtr)
1009 {
1010 int regionID;
1011
1012 for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
1013 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
1014 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
1015 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
1016 }
1017 if (rf_parityLogDebug)
1018 printf("[parity logging enabled]\n");
1019 }
1020
1021 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
1022