rf_paritylogDiskMgr.c revision 1.2 1 /* $NetBSD: rf_paritylogDiskMgr.c,v 1.2 1999/01/26 02:33:59 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28 /* Code for flushing and reintegration operations related to parity logging.
29 *
30 */
31
32 #include "rf_archs.h"
33
34 #if RF_INCLUDE_PARITYLOGGING > 0
35
36 #include "rf_types.h"
37 #include "rf_threadstuff.h"
38 #include "rf_mcpair.h"
39 #include "rf_raid.h"
40 #include "rf_dag.h"
41 #include "rf_dagfuncs.h"
42 #include "rf_desc.h"
43 #include "rf_layout.h"
44 #include "rf_diskqueue.h"
45 #include "rf_paritylog.h"
46 #include "rf_general.h"
47 #include "rf_threadid.h"
48 #include "rf_etimer.h"
49 #include "rf_paritylogging.h"
50 #include "rf_engine.h"
51 #include "rf_dagutils.h"
52 #include "rf_map.h"
53 #include "rf_parityscan.h"
54 #include "rf_sys.h"
55
56 #include "rf_paritylogDiskMgr.h"
57
58 static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *);
59
60 static caddr_t AcquireReintBuffer(pool)
61 RF_RegionBufferQueue_t *pool;
62 {
63 caddr_t bufPtr = NULL;
64
65 /* Return a region buffer from the free list (pool).
66 If the free list is empty, WAIT.
67 BLOCKING */
68
69 RF_LOCK_MUTEX(pool->mutex);
70 if (pool->availableBuffers > 0) {
71 bufPtr = pool->buffers[pool->availBuffersIndex];
72 pool->availableBuffers--;
73 pool->availBuffersIndex++;
74 if (pool->availBuffersIndex == pool->totalBuffers)
75 pool->availBuffersIndex = 0;
76 RF_UNLOCK_MUTEX(pool->mutex);
77 }
78 else {
79 RF_PANIC(); /* should never happen in currect config, single reint */
80 RF_WAIT_COND(pool->cond, pool->mutex);
81 }
82 return(bufPtr);
83 }
84
85 static void ReleaseReintBuffer(
86 RF_RegionBufferQueue_t *pool,
87 caddr_t bufPtr)
88 {
89 /* Insert a region buffer (bufPtr) into the free list (pool).
90 NON-BLOCKING */
91
92 RF_LOCK_MUTEX(pool->mutex);
93 pool->availableBuffers++;
94 pool->buffers[pool->emptyBuffersIndex] = bufPtr;
95 pool->emptyBuffersIndex++;
96 if (pool->emptyBuffersIndex == pool->totalBuffers)
97 pool->emptyBuffersIndex = 0;
98 RF_ASSERT(pool->availableBuffers <= pool->totalBuffers);
99 RF_UNLOCK_MUTEX(pool->mutex);
100 RF_SIGNAL_COND(pool->cond);
101 }
102
103
104
105 static void ReadRegionLog(
106 RF_RegionId_t regionID,
107 RF_MCPair_t *rrd_mcpair,
108 caddr_t regionBuffer,
109 RF_Raid_t *raidPtr,
110 RF_DagHeader_t **rrd_dag_h,
111 RF_AllocListElem_t **rrd_alloclist,
112 RF_PhysDiskAddr_t **rrd_pda)
113 {
114 /* Initiate the read a region log from disk. Once initiated, return
115 to the calling routine.
116
117 NON-BLOCKING
118 */
119
120 RF_AccTraceEntry_t tracerec;
121 RF_DagNode_t *rrd_rdNode;
122
123 /* create DAG to read region log from disk */
124 rf_MakeAllocList(*rrd_alloclist);
125 *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, rf_DiskReadFunc, rf_DiskReadUndoFunc,
126 "Rrl", *rrd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
127
128 /* create and initialize PDA for the core log */
129 /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
130 *rrd_pda = rf_AllocPDAList(1);
131 rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), &((*rrd_pda)->col), &((*rrd_pda)->startSector));
132 (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity;
133
134 if ((*rrd_pda)->next) {
135 (*rrd_pda)->next = NULL;
136 printf("set rrd_pda->next to NULL\n");
137 }
138
139 /* initialize DAG parameters */
140 bzero((char *)&tracerec,sizeof(tracerec));
141 (*rrd_dag_h)->tracerec = &tracerec;
142 rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0];
143 rrd_rdNode->params[0].p = *rrd_pda;
144 /* rrd_rdNode->params[1] = regionBuffer; */
145 rrd_rdNode->params[2].v = 0;
146 rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
147
148 /* launch region log read dag */
149 rf_DispatchDAG(*rrd_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
150 (void *) rrd_mcpair);
151 }
152
153
154
155 static void WriteCoreLog(
156 RF_ParityLog_t *log,
157 RF_MCPair_t *fwr_mcpair,
158 RF_Raid_t *raidPtr,
159 RF_DagHeader_t **fwr_dag_h,
160 RF_AllocListElem_t **fwr_alloclist,
161 RF_PhysDiskAddr_t **fwr_pda)
162 {
163 RF_RegionId_t regionID = log->regionID;
164 RF_AccTraceEntry_t tracerec;
165 RF_SectorNum_t regionOffset;
166 RF_DagNode_t *fwr_wrNode;
167
168 /* Initiate the write of a core log to a region log disk.
169 Once initiated, return to the calling routine.
170
171 NON-BLOCKING
172 */
173
174 /* create DAG to write a core log to a region log disk */
175 rf_MakeAllocList(*fwr_alloclist);
176 *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
177 "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
178
179 /* create and initialize PDA for the region log */
180 /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
181 *fwr_pda = rf_AllocPDAList(1);
182 regionOffset = log->diskOffset;
183 rf_MapLogParityLogging(raidPtr, regionID, regionOffset, &((*fwr_pda)->row), &((*fwr_pda)->col), &((*fwr_pda)->startSector));
184 (*fwr_pda)->numSector = raidPtr->numSectorsPerLog;
185
186 /* initialize DAG parameters */
187 bzero((char *)&tracerec,sizeof(tracerec));
188 (*fwr_dag_h)->tracerec = &tracerec;
189 fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0];
190 fwr_wrNode->params[0].p = *fwr_pda;
191 /* fwr_wrNode->params[1] = log->bufPtr; */
192 fwr_wrNode->params[2].v = 0;
193 fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
194
195 /* launch the dag to write the core log to disk */
196 rf_DispatchDAG(*fwr_dag_h, (void (*)(void *)) rf_MCPairWakeupFunc,
197 (void *) fwr_mcpair);
198 }
199
200
201 static void ReadRegionParity(
202 RF_RegionId_t regionID,
203 RF_MCPair_t *prd_mcpair,
204 caddr_t parityBuffer,
205 RF_Raid_t *raidPtr,
206 RF_DagHeader_t **prd_dag_h,
207 RF_AllocListElem_t **prd_alloclist,
208 RF_PhysDiskAddr_t **prd_pda)
209 {
210 /* Initiate the read region parity from disk.
211 Once initiated, return to the calling routine.
212
213 NON-BLOCKING
214 */
215
216 RF_AccTraceEntry_t tracerec;
217 RF_DagNode_t *prd_rdNode;
218
219 /* create DAG to read region parity from disk */
220 rf_MakeAllocList(*prd_alloclist);
221 *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, rf_DiskReadUndoFunc,
222 "Rrp", *prd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
223
224 /* create and initialize PDA for region parity */
225 /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
226 *prd_pda = rf_AllocPDAList(1);
227 rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), &((*prd_pda)->col), &((*prd_pda)->startSector), &((*prd_pda)->numSector));
228 if (rf_parityLogDebug)
229 printf("[reading %d sectors of parity from region %d]\n",
230 (int)(*prd_pda)->numSector, regionID);
231 if ((*prd_pda)->next) {
232 (*prd_pda)->next = NULL;
233 printf("set prd_pda->next to NULL\n");
234 }
235
236 /* initialize DAG parameters */
237 bzero((char *)&tracerec,sizeof(tracerec));
238 (*prd_dag_h)->tracerec = &tracerec;
239 prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0];
240 prd_rdNode->params[0].p = *prd_pda;
241 prd_rdNode->params[1].p = parityBuffer;
242 prd_rdNode->params[2].v = 0;
243 prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
244 if (rf_validateDAGDebug)
245 rf_ValidateDAG(*prd_dag_h);
246 /* launch region parity read dag */
247 rf_DispatchDAG(*prd_dag_h, (void (*)(void *)) rf_MCPairWakeupFunc,
248 (void *) prd_mcpair);
249 }
250
251 static void WriteRegionParity(
252 RF_RegionId_t regionID,
253 RF_MCPair_t *pwr_mcpair,
254 caddr_t parityBuffer,
255 RF_Raid_t *raidPtr,
256 RF_DagHeader_t **pwr_dag_h,
257 RF_AllocListElem_t **pwr_alloclist,
258 RF_PhysDiskAddr_t **pwr_pda)
259 {
260 /* Initiate the write of region parity to disk.
261 Once initiated, return to the calling routine.
262
263 NON-BLOCKING
264 */
265
266 RF_AccTraceEntry_t tracerec;
267 RF_DagNode_t *pwr_wrNode;
268
269 /* create DAG to write region log from disk */
270 rf_MakeAllocList(*pwr_alloclist);
271 *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
272 "Wrp", *pwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
273
274 /* create and initialize PDA for region parity */
275 /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
276 *pwr_pda = rf_AllocPDAList(1);
277 rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), &((*pwr_pda)->col), &((*pwr_pda)->startSector), &((*pwr_pda)->numSector));
278
279 /* initialize DAG parameters */
280 bzero((char *)&tracerec,sizeof(tracerec));
281 (*pwr_dag_h)->tracerec = &tracerec;
282 pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0];
283 pwr_wrNode->params[0].p = *pwr_pda;
284 /* pwr_wrNode->params[1] = parityBuffer; */
285 pwr_wrNode->params[2].v = 0;
286 pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
287
288 /* launch the dag to write region parity to disk */
289 rf_DispatchDAG(*pwr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
290 (void *) pwr_mcpair);
291 }
292
293 static void FlushLogsToDisk(
294 RF_Raid_t *raidPtr,
295 RF_ParityLog_t *logList)
296 {
297 /* Flush a linked list of core logs to the log disk.
298 Logs contain the disk location where they should be
299 written. Logs were written in FIFO order and that
300 order must be preserved.
301
302 Recommended optimizations:
303 1) allow multiple flushes to occur simultaneously
304 2) coalesce contiguous flush operations
305
306 BLOCKING
307 */
308
309 RF_ParityLog_t *log;
310 RF_RegionId_t regionID;
311 RF_MCPair_t *fwr_mcpair;
312 RF_DagHeader_t *fwr_dag_h;
313 RF_AllocListElem_t *fwr_alloclist;
314 RF_PhysDiskAddr_t *fwr_pda;
315
316 fwr_mcpair = rf_AllocMCPair();
317 RF_LOCK_MUTEX(fwr_mcpair->mutex);
318
319 RF_ASSERT(logList);
320 log = logList;
321 while (log)
322 {
323 regionID = log->regionID;
324
325 /* create and launch a DAG to write the core log */
326 if (rf_parityLogDebug)
327 printf("[initiating write of core log for region %d]\n", regionID);
328 fwr_mcpair->flag = RF_FALSE;
329 WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, &fwr_alloclist, &fwr_pda);
330
331 /* wait for the DAG to complete */
332 while (!fwr_mcpair->flag)
333 RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex);
334 if (fwr_dag_h->status != rf_enable)
335 {
336 RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID);
337 RF_ASSERT(0);
338 }
339
340 /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */
341 rf_FreePhysDiskAddr(fwr_pda);
342 rf_FreeDAG(fwr_dag_h);
343 rf_FreeAllocList(fwr_alloclist);
344
345 log = log->next;
346 }
347 RF_UNLOCK_MUTEX(fwr_mcpair->mutex);
348 rf_FreeMCPair(fwr_mcpair);
349 rf_ReleaseParityLogs(raidPtr, logList);
350 }
351
352 static void ReintegrateRegion(
353 RF_Raid_t *raidPtr,
354 RF_RegionId_t regionID,
355 RF_ParityLog_t *coreLog)
356 {
357 RF_MCPair_t *rrd_mcpair=NULL, *prd_mcpair, *pwr_mcpair;
358 RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h;
359 RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist;
360 RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda;
361 caddr_t parityBuffer, regionBuffer=NULL;
362
363 /* Reintegrate a region (regionID).
364 1. acquire region and parity buffers
365 2. read log from disk
366 3. read parity from disk
367 4. apply log to parity
368 5. apply core log to parity
369 6. write new parity to disk
370
371 BLOCKING
372 */
373
374 if (rf_parityLogDebug)
375 printf("[reintegrating region %d]\n", regionID);
376
377 /* initiate read of region parity */
378 if (rf_parityLogDebug)
379 printf("[initiating read of parity for region %d]\n", regionID);
380 parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool);
381 prd_mcpair = rf_AllocMCPair();
382 RF_LOCK_MUTEX(prd_mcpair->mutex);
383 prd_mcpair->flag = RF_FALSE;
384 ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, &prd_dag_h, &prd_alloclist, &prd_pda);
385
386 /* if region log nonempty, initiate read */
387 if (raidPtr->regionInfo[regionID].diskCount > 0)
388 {
389 if (rf_parityLogDebug)
390 printf("[initiating read of disk log for region %d]\n", regionID);
391 regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool);
392 rrd_mcpair = rf_AllocMCPair();
393 RF_LOCK_MUTEX(rrd_mcpair->mutex);
394 rrd_mcpair->flag = RF_FALSE;
395 ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, &rrd_dag_h, &rrd_alloclist, &rrd_pda);
396 }
397
398 /* wait on read of region parity to complete */
399 while (!prd_mcpair->flag) {
400 RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex);
401 }
402 RF_UNLOCK_MUTEX(prd_mcpair->mutex);
403 if (prd_dag_h->status != rf_enable)
404 {
405 RF_ERRORMSG("Unable to read parity from disk\n");
406 /* add code to fail the parity disk */
407 RF_ASSERT(0);
408 }
409
410 /* apply core log to parity */
411 /* if (coreLog)
412 ApplyLogsToParity(coreLog, parityBuffer); */
413
414 if (raidPtr->regionInfo[regionID].diskCount > 0)
415 {
416 /* wait on read of region log to complete */
417 while (!rrd_mcpair->flag)
418 RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex);
419 RF_UNLOCK_MUTEX(rrd_mcpair->mutex);
420 if (rrd_dag_h->status != rf_enable)
421 {
422 RF_ERRORMSG("Unable to read region log from disk\n");
423 /* add code to fail the log disk */
424 RF_ASSERT(0);
425 }
426 /* apply region log to parity */
427 /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */
428 /* release resources associated with region log */
429 /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */
430 rf_FreePhysDiskAddr(rrd_pda);
431 rf_FreeDAG(rrd_dag_h);
432 rf_FreeAllocList(rrd_alloclist);
433 rf_FreeMCPair(rrd_mcpair);
434 ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer);
435 }
436
437 /* write reintegrated parity to disk */
438 if (rf_parityLogDebug)
439 printf("[initiating write of parity for region %d]\n", regionID);
440 pwr_mcpair = rf_AllocMCPair();
441 RF_LOCK_MUTEX(pwr_mcpair->mutex);
442 pwr_mcpair->flag = RF_FALSE;
443 WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, &pwr_dag_h, &pwr_alloclist, &pwr_pda);
444 while (!pwr_mcpair->flag)
445 RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex);
446 RF_UNLOCK_MUTEX(pwr_mcpair->mutex);
447 if (pwr_dag_h->status != rf_enable)
448 {
449 RF_ERRORMSG("Unable to write parity to disk\n");
450 /* add code to fail the parity disk */
451 RF_ASSERT(0);
452 }
453
454 /* release resources associated with read of old parity */
455 /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */
456 rf_FreePhysDiskAddr(prd_pda);
457 rf_FreeDAG(prd_dag_h);
458 rf_FreeAllocList(prd_alloclist);
459 rf_FreeMCPair(prd_mcpair);
460
461 /* release resources associated with write of new parity */
462 ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer);
463 /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */
464 rf_FreePhysDiskAddr(pwr_pda);
465 rf_FreeDAG(pwr_dag_h);
466 rf_FreeAllocList(pwr_alloclist);
467 rf_FreeMCPair(pwr_mcpair);
468
469 if (rf_parityLogDebug)
470 printf("[finished reintegrating region %d]\n", regionID);
471 }
472
473
474
475 static void ReintegrateLogs(
476 RF_Raid_t *raidPtr,
477 RF_ParityLog_t *logList)
478 {
479 RF_ParityLog_t *log, *freeLogList = NULL;
480 RF_ParityLogData_t *logData, *logDataList;
481 RF_RegionId_t regionID;
482
483 RF_ASSERT(logList);
484 while (logList)
485 {
486 log = logList;
487 logList = logList->next;
488 log->next = NULL;
489 regionID = log->regionID;
490 ReintegrateRegion(raidPtr, regionID, log);
491 log->numRecords = 0;
492
493 /* remove all items which are blocked on reintegration of this region */
494 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
495 logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
496 logDataList = logData;
497 while (logData)
498 {
499 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
500 logData = logData->next;
501 }
502 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
503
504 /* process blocked log data and clear reintInProgress flag for this region */
505 if (logDataList)
506 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE);
507 else
508 {
509 /* Enable flushing for this region. Holding both locks provides
510 a synchronization barrier with DumpParityLogToDisk
511 */
512 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
513 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
514 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
515 raidPtr->regionInfo[regionID].diskCount = 0;
516 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
517 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
518 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now enabled */
519 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
520 }
521 /* if log wasn't used, attach it to the list of logs to be returned */
522 if (log)
523 {
524 log->next = freeLogList;
525 freeLogList = log;
526 }
527 }
528 if (freeLogList)
529 rf_ReleaseParityLogs(raidPtr, freeLogList);
530 }
531
532 int rf_ShutdownLogging(RF_Raid_t *raidPtr)
533 {
534 /* shutdown parity logging
535 1) disable parity logging in all regions
536 2) reintegrate all regions
537 */
538
539 RF_SectorCount_t diskCount;
540 RF_RegionId_t regionID;
541 RF_ParityLog_t *log;
542
543 if (rf_parityLogDebug)
544 printf("[shutting down parity logging]\n");
545 /* Since parity log maps are volatile, we must reintegrate all regions. */
546 if (rf_forceParityLogReint) {
547 for (regionID = 0; regionID < rf_numParityRegions; regionID++)
548 {
549 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
550 raidPtr->regionInfo[regionID].loggingEnabled = RF_FALSE;
551 log = raidPtr->regionInfo[regionID].coreLog;
552 raidPtr->regionInfo[regionID].coreLog = NULL;
553 diskCount = raidPtr->regionInfo[regionID].diskCount;
554 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
555 if (diskCount > 0 || log != NULL)
556 ReintegrateRegion(raidPtr, regionID, log);
557 if (log != NULL)
558 rf_ReleaseParityLogs(raidPtr, log);
559 }
560 }
561 if (rf_parityLogDebug)
562 {
563 printf("[parity logging disabled]\n");
564 printf("[should be done!]\n");
565 }
566 return(0);
567 }
568
569 int rf_ParityLoggingDiskManager(RF_Raid_t *raidPtr)
570 {
571 RF_ParityLog_t *reintQueue, *flushQueue;
572 int workNeeded, done = RF_FALSE;
573
574 rf_assign_threadid(); /* don't remove this line */
575
576 /* Main program for parity logging disk thread. This routine waits
577 for work to appear in either the flush or reintegration queues
578 and is responsible for flushing core logs to the log disk as
579 well as reintegrating parity regions.
580
581 BLOCKING
582 */
583
584 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
585
586 /*
587 * Inform our creator that we're running. Don't bother doing the
588 * mutex lock/unlock dance- we locked above, and we'll unlock
589 * below with nothing to do, yet.
590 */
591 raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING;
592 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
593
594 /* empty the work queues */
595 flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL;
596 reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL;
597 workNeeded = (flushQueue || reintQueue);
598
599 while (!done)
600 {
601 while (workNeeded)
602 {
603 /* First, flush all logs in the flush queue, freeing buffers
604 Second, reintegrate all regions which are reported as full.
605 Third, append queued log data until blocked.
606
607 Note: Incoming appends (ParityLogAppend) can block on either
608 1. empty buffer pool
609 2. region under reintegration
610 To preserve a global FIFO ordering of appends, buffers are not
611 released to the world until those appends blocked on buffers are
612 removed from the append queue. Similarly, regions which are
613 reintegrated are not opened for general use until the append
614 queue has been emptied.
615 */
616
617 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
618
619 /* empty flushQueue, using free'd log buffers to process bufTail */
620 if (flushQueue)
621 FlushLogsToDisk(raidPtr, flushQueue);
622
623 /* empty reintQueue, flushing from reintTail as we go */
624 if (reintQueue)
625 ReintegrateLogs(raidPtr, reintQueue);
626
627 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
628 flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL;
629 reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL;
630 workNeeded = (flushQueue || reintQueue);
631 }
632 /* no work is needed at this point */
633 if (raidPtr->parityLogDiskQueue.threadState&RF_PLOG_TERMINATE)
634 {
635 /* shutdown parity logging
636 1. disable parity logging in all regions
637 2. reintegrate all regions
638 */
639 done = RF_TRUE; /* thread disabled, no work needed */
640 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
641 rf_ShutdownLogging(raidPtr);
642 }
643 if (!done)
644 {
645 /* thread enabled, no work needed, so sleep */
646 if (rf_parityLogDebug)
647 printf("[parity logging disk manager sleeping]\n");
648 RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex);
649 if (rf_parityLogDebug)
650 printf("[parity logging disk manager just woke up]\n");
651 flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL;
652 reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL;
653 workNeeded = (flushQueue || reintQueue);
654 }
655 }
656 /*
657 * Announce that we're done.
658 */
659 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
660 raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN;
661 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
662 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
663 #if defined(__NetBSD__) && defined(_KERNEL)
664 /*
665 * In the NetBSD kernel, the thread must exit; returning would
666 * cause the proc trampoline to attempt to return to userspace.
667 */
668 kthread_exit(0); /* does not return */
669 #else
670 return(0);
671 #endif
672 }
673
674 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
675