rf_paritylogDiskMgr.c revision 1.5 1 /* $NetBSD: rf_paritylogDiskMgr.c,v 1.5 2000/01/07 03:25:35 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28 /* Code for flushing and reintegration operations related to parity logging.
29 *
30 */
31
32 #include "rf_archs.h"
33
34 #if RF_INCLUDE_PARITYLOGGING > 0
35
36 #include "rf_types.h"
37 #include "rf_threadstuff.h"
38 #include "rf_mcpair.h"
39 #include "rf_raid.h"
40 #include "rf_dag.h"
41 #include "rf_dagfuncs.h"
42 #include "rf_desc.h"
43 #include "rf_layout.h"
44 #include "rf_diskqueue.h"
45 #include "rf_paritylog.h"
46 #include "rf_general.h"
47 #include "rf_threadid.h"
48 #include "rf_etimer.h"
49 #include "rf_paritylogging.h"
50 #include "rf_engine.h"
51 #include "rf_dagutils.h"
52 #include "rf_map.h"
53 #include "rf_parityscan.h"
54
55 #include "rf_paritylogDiskMgr.h"
56
57 static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *);
58
59 static caddr_t
60 AcquireReintBuffer(pool)
61 RF_RegionBufferQueue_t *pool;
62 {
63 caddr_t bufPtr = NULL;
64
65 /* Return a region buffer from the free list (pool). If the free list
66 * is empty, WAIT. BLOCKING */
67
68 RF_LOCK_MUTEX(pool->mutex);
69 if (pool->availableBuffers > 0) {
70 bufPtr = pool->buffers[pool->availBuffersIndex];
71 pool->availableBuffers--;
72 pool->availBuffersIndex++;
73 if (pool->availBuffersIndex == pool->totalBuffers)
74 pool->availBuffersIndex = 0;
75 RF_UNLOCK_MUTEX(pool->mutex);
76 } else {
77 RF_PANIC(); /* should never happen in currect config,
78 * single reint */
79 RF_WAIT_COND(pool->cond, pool->mutex);
80 }
81 return (bufPtr);
82 }
83
84 static void
85 ReleaseReintBuffer(
86 RF_RegionBufferQueue_t * pool,
87 caddr_t bufPtr)
88 {
89 /* Insert a region buffer (bufPtr) into the free list (pool).
90 * NON-BLOCKING */
91
92 RF_LOCK_MUTEX(pool->mutex);
93 pool->availableBuffers++;
94 pool->buffers[pool->emptyBuffersIndex] = bufPtr;
95 pool->emptyBuffersIndex++;
96 if (pool->emptyBuffersIndex == pool->totalBuffers)
97 pool->emptyBuffersIndex = 0;
98 RF_ASSERT(pool->availableBuffers <= pool->totalBuffers);
99 RF_UNLOCK_MUTEX(pool->mutex);
100 RF_SIGNAL_COND(pool->cond);
101 }
102
103
104
105 static void
106 ReadRegionLog(
107 RF_RegionId_t regionID,
108 RF_MCPair_t * rrd_mcpair,
109 caddr_t regionBuffer,
110 RF_Raid_t * raidPtr,
111 RF_DagHeader_t ** rrd_dag_h,
112 RF_AllocListElem_t ** rrd_alloclist,
113 RF_PhysDiskAddr_t ** rrd_pda)
114 {
115 /* Initiate the read a region log from disk. Once initiated, return
116 * to the calling routine.
117 *
118 * NON-BLOCKING */
119
120 RF_AccTraceEntry_t tracerec;
121 RF_DagNode_t *rrd_rdNode;
122
123 /* create DAG to read region log from disk */
124 rf_MakeAllocList(*rrd_alloclist);
125 *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, rf_DiskReadFunc, rf_DiskReadUndoFunc,
126 "Rrl", *rrd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
127
128 /* create and initialize PDA for the core log */
129 /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
130 * *)); */
131 *rrd_pda = rf_AllocPDAList(1);
132 rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), &((*rrd_pda)->col), &((*rrd_pda)->startSector));
133 (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity;
134
135 if ((*rrd_pda)->next) {
136 (*rrd_pda)->next = NULL;
137 printf("set rrd_pda->next to NULL\n");
138 }
139 /* initialize DAG parameters */
140 bzero((char *) &tracerec, sizeof(tracerec));
141 (*rrd_dag_h)->tracerec = &tracerec;
142 rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0];
143 rrd_rdNode->params[0].p = *rrd_pda;
144 /* rrd_rdNode->params[1] = regionBuffer; */
145 rrd_rdNode->params[2].v = 0;
146 rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
147
148 /* launch region log read dag */
149 rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
150 (void *) rrd_mcpair);
151 }
152
153
154
155 static void
156 WriteCoreLog(
157 RF_ParityLog_t * log,
158 RF_MCPair_t * fwr_mcpair,
159 RF_Raid_t * raidPtr,
160 RF_DagHeader_t ** fwr_dag_h,
161 RF_AllocListElem_t ** fwr_alloclist,
162 RF_PhysDiskAddr_t ** fwr_pda)
163 {
164 RF_RegionId_t regionID = log->regionID;
165 RF_AccTraceEntry_t tracerec;
166 RF_SectorNum_t regionOffset;
167 RF_DagNode_t *fwr_wrNode;
168
169 /* Initiate the write of a core log to a region log disk. Once
170 * initiated, return to the calling routine.
171 *
172 * NON-BLOCKING */
173
174 /* create DAG to write a core log to a region log disk */
175 rf_MakeAllocList(*fwr_alloclist);
176 *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
177 "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
178
179 /* create and initialize PDA for the region log */
180 /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
181 * *)); */
182 *fwr_pda = rf_AllocPDAList(1);
183 regionOffset = log->diskOffset;
184 rf_MapLogParityLogging(raidPtr, regionID, regionOffset, &((*fwr_pda)->row), &((*fwr_pda)->col), &((*fwr_pda)->startSector));
185 (*fwr_pda)->numSector = raidPtr->numSectorsPerLog;
186
187 /* initialize DAG parameters */
188 bzero((char *) &tracerec, sizeof(tracerec));
189 (*fwr_dag_h)->tracerec = &tracerec;
190 fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0];
191 fwr_wrNode->params[0].p = *fwr_pda;
192 /* fwr_wrNode->params[1] = log->bufPtr; */
193 fwr_wrNode->params[2].v = 0;
194 fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
195
196 /* launch the dag to write the core log to disk */
197 rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
198 (void *) fwr_mcpair);
199 }
200
201
202 static void
203 ReadRegionParity(
204 RF_RegionId_t regionID,
205 RF_MCPair_t * prd_mcpair,
206 caddr_t parityBuffer,
207 RF_Raid_t * raidPtr,
208 RF_DagHeader_t ** prd_dag_h,
209 RF_AllocListElem_t ** prd_alloclist,
210 RF_PhysDiskAddr_t ** prd_pda)
211 {
212 /* Initiate the read region parity from disk. Once initiated, return
213 * to the calling routine.
214 *
215 * NON-BLOCKING */
216
217 RF_AccTraceEntry_t tracerec;
218 RF_DagNode_t *prd_rdNode;
219
220 /* create DAG to read region parity from disk */
221 rf_MakeAllocList(*prd_alloclist);
222 *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, rf_DiskReadUndoFunc,
223 "Rrp", *prd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
224
225 /* create and initialize PDA for region parity */
226 /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
227 * *)); */
228 *prd_pda = rf_AllocPDAList(1);
229 rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), &((*prd_pda)->col), &((*prd_pda)->startSector), &((*prd_pda)->numSector));
230 if (rf_parityLogDebug)
231 printf("[reading %d sectors of parity from region %d]\n",
232 (int) (*prd_pda)->numSector, regionID);
233 if ((*prd_pda)->next) {
234 (*prd_pda)->next = NULL;
235 printf("set prd_pda->next to NULL\n");
236 }
237 /* initialize DAG parameters */
238 bzero((char *) &tracerec, sizeof(tracerec));
239 (*prd_dag_h)->tracerec = &tracerec;
240 prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0];
241 prd_rdNode->params[0].p = *prd_pda;
242 prd_rdNode->params[1].p = parityBuffer;
243 prd_rdNode->params[2].v = 0;
244 prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
245 if (rf_validateDAGDebug)
246 rf_ValidateDAG(*prd_dag_h);
247 /* launch region parity read dag */
248 rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
249 (void *) prd_mcpair);
250 }
251
252 static void
253 WriteRegionParity(
254 RF_RegionId_t regionID,
255 RF_MCPair_t * pwr_mcpair,
256 caddr_t parityBuffer,
257 RF_Raid_t * raidPtr,
258 RF_DagHeader_t ** pwr_dag_h,
259 RF_AllocListElem_t ** pwr_alloclist,
260 RF_PhysDiskAddr_t ** pwr_pda)
261 {
262 /* Initiate the write of region parity to disk. Once initiated, return
263 * to the calling routine.
264 *
265 * NON-BLOCKING */
266
267 RF_AccTraceEntry_t tracerec;
268 RF_DagNode_t *pwr_wrNode;
269
270 /* create DAG to write region log from disk */
271 rf_MakeAllocList(*pwr_alloclist);
272 *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
273 "Wrp", *pwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
274
275 /* create and initialize PDA for region parity */
276 /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
277 * *)); */
278 *pwr_pda = rf_AllocPDAList(1);
279 rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), &((*pwr_pda)->col), &((*pwr_pda)->startSector), &((*pwr_pda)->numSector));
280
281 /* initialize DAG parameters */
282 bzero((char *) &tracerec, sizeof(tracerec));
283 (*pwr_dag_h)->tracerec = &tracerec;
284 pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0];
285 pwr_wrNode->params[0].p = *pwr_pda;
286 /* pwr_wrNode->params[1] = parityBuffer; */
287 pwr_wrNode->params[2].v = 0;
288 pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
289
290 /* launch the dag to write region parity to disk */
291 rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
292 (void *) pwr_mcpair);
293 }
294
295 static void
296 FlushLogsToDisk(
297 RF_Raid_t * raidPtr,
298 RF_ParityLog_t * logList)
299 {
300 /* Flush a linked list of core logs to the log disk. Logs contain the
301 * disk location where they should be written. Logs were written in
302 * FIFO order and that order must be preserved.
303 *
304 * Recommended optimizations: 1) allow multiple flushes to occur
305 * simultaneously 2) coalesce contiguous flush operations
306 *
307 * BLOCKING */
308
309 RF_ParityLog_t *log;
310 RF_RegionId_t regionID;
311 RF_MCPair_t *fwr_mcpair;
312 RF_DagHeader_t *fwr_dag_h;
313 RF_AllocListElem_t *fwr_alloclist;
314 RF_PhysDiskAddr_t *fwr_pda;
315
316 fwr_mcpair = rf_AllocMCPair();
317 RF_LOCK_MUTEX(fwr_mcpair->mutex);
318
319 RF_ASSERT(logList);
320 log = logList;
321 while (log) {
322 regionID = log->regionID;
323
324 /* create and launch a DAG to write the core log */
325 if (rf_parityLogDebug)
326 printf("[initiating write of core log for region %d]\n", regionID);
327 fwr_mcpair->flag = RF_FALSE;
328 WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, &fwr_alloclist, &fwr_pda);
329
330 /* wait for the DAG to complete */
331 while (!fwr_mcpair->flag)
332 RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex);
333 if (fwr_dag_h->status != rf_enable) {
334 RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID);
335 RF_ASSERT(0);
336 }
337 /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */
338 rf_FreePhysDiskAddr(fwr_pda);
339 rf_FreeDAG(fwr_dag_h);
340 rf_FreeAllocList(fwr_alloclist);
341
342 log = log->next;
343 }
344 RF_UNLOCK_MUTEX(fwr_mcpair->mutex);
345 rf_FreeMCPair(fwr_mcpair);
346 rf_ReleaseParityLogs(raidPtr, logList);
347 }
348
349 static void
350 ReintegrateRegion(
351 RF_Raid_t * raidPtr,
352 RF_RegionId_t regionID,
353 RF_ParityLog_t * coreLog)
354 {
355 RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair;
356 RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h;
357 RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist;
358 RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda;
359 caddr_t parityBuffer, regionBuffer = NULL;
360
361 /* Reintegrate a region (regionID). 1. acquire region and parity
362 * buffers 2. read log from disk 3. read parity from disk 4. apply log
363 * to parity 5. apply core log to parity 6. write new parity to disk
364 *
365 * BLOCKING */
366
367 if (rf_parityLogDebug)
368 printf("[reintegrating region %d]\n", regionID);
369
370 /* initiate read of region parity */
371 if (rf_parityLogDebug)
372 printf("[initiating read of parity for region %d]\n", regionID);
373 parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool);
374 prd_mcpair = rf_AllocMCPair();
375 RF_LOCK_MUTEX(prd_mcpair->mutex);
376 prd_mcpair->flag = RF_FALSE;
377 ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, &prd_dag_h, &prd_alloclist, &prd_pda);
378
379 /* if region log nonempty, initiate read */
380 if (raidPtr->regionInfo[regionID].diskCount > 0) {
381 if (rf_parityLogDebug)
382 printf("[initiating read of disk log for region %d]\n", regionID);
383 regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool);
384 rrd_mcpair = rf_AllocMCPair();
385 RF_LOCK_MUTEX(rrd_mcpair->mutex);
386 rrd_mcpair->flag = RF_FALSE;
387 ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, &rrd_dag_h, &rrd_alloclist, &rrd_pda);
388 }
389 /* wait on read of region parity to complete */
390 while (!prd_mcpair->flag) {
391 RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex);
392 }
393 RF_UNLOCK_MUTEX(prd_mcpair->mutex);
394 if (prd_dag_h->status != rf_enable) {
395 RF_ERRORMSG("Unable to read parity from disk\n");
396 /* add code to fail the parity disk */
397 RF_ASSERT(0);
398 }
399 /* apply core log to parity */
400 /* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */
401
402 if (raidPtr->regionInfo[regionID].diskCount > 0) {
403 /* wait on read of region log to complete */
404 while (!rrd_mcpair->flag)
405 RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex);
406 RF_UNLOCK_MUTEX(rrd_mcpair->mutex);
407 if (rrd_dag_h->status != rf_enable) {
408 RF_ERRORMSG("Unable to read region log from disk\n");
409 /* add code to fail the log disk */
410 RF_ASSERT(0);
411 }
412 /* apply region log to parity */
413 /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */
414 /* release resources associated with region log */
415 /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */
416 rf_FreePhysDiskAddr(rrd_pda);
417 rf_FreeDAG(rrd_dag_h);
418 rf_FreeAllocList(rrd_alloclist);
419 rf_FreeMCPair(rrd_mcpair);
420 ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer);
421 }
422 /* write reintegrated parity to disk */
423 if (rf_parityLogDebug)
424 printf("[initiating write of parity for region %d]\n", regionID);
425 pwr_mcpair = rf_AllocMCPair();
426 RF_LOCK_MUTEX(pwr_mcpair->mutex);
427 pwr_mcpair->flag = RF_FALSE;
428 WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, &pwr_dag_h, &pwr_alloclist, &pwr_pda);
429 while (!pwr_mcpair->flag)
430 RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex);
431 RF_UNLOCK_MUTEX(pwr_mcpair->mutex);
432 if (pwr_dag_h->status != rf_enable) {
433 RF_ERRORMSG("Unable to write parity to disk\n");
434 /* add code to fail the parity disk */
435 RF_ASSERT(0);
436 }
437 /* release resources associated with read of old parity */
438 /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */
439 rf_FreePhysDiskAddr(prd_pda);
440 rf_FreeDAG(prd_dag_h);
441 rf_FreeAllocList(prd_alloclist);
442 rf_FreeMCPair(prd_mcpair);
443
444 /* release resources associated with write of new parity */
445 ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer);
446 /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */
447 rf_FreePhysDiskAddr(pwr_pda);
448 rf_FreeDAG(pwr_dag_h);
449 rf_FreeAllocList(pwr_alloclist);
450 rf_FreeMCPair(pwr_mcpair);
451
452 if (rf_parityLogDebug)
453 printf("[finished reintegrating region %d]\n", regionID);
454 }
455
456
457
458 static void
459 ReintegrateLogs(
460 RF_Raid_t * raidPtr,
461 RF_ParityLog_t * logList)
462 {
463 RF_ParityLog_t *log, *freeLogList = NULL;
464 RF_ParityLogData_t *logData, *logDataList;
465 RF_RegionId_t regionID;
466
467 RF_ASSERT(logList);
468 while (logList) {
469 log = logList;
470 logList = logList->next;
471 log->next = NULL;
472 regionID = log->regionID;
473 ReintegrateRegion(raidPtr, regionID, log);
474 log->numRecords = 0;
475
476 /* remove all items which are blocked on reintegration of this
477 * region */
478 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
479 logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
480 logDataList = logData;
481 while (logData) {
482 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
483 logData = logData->next;
484 }
485 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
486
487 /* process blocked log data and clear reintInProgress flag for
488 * this region */
489 if (logDataList)
490 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE);
491 else {
492 /* Enable flushing for this region. Holding both
493 * locks provides a synchronization barrier with
494 * DumpParityLogToDisk */
495 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
496 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
497 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
498 raidPtr->regionInfo[regionID].diskCount = 0;
499 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
500 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
501 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now
502 * enabled */
503 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
504 }
505 /* if log wasn't used, attach it to the list of logs to be
506 * returned */
507 if (log) {
508 log->next = freeLogList;
509 freeLogList = log;
510 }
511 }
512 if (freeLogList)
513 rf_ReleaseParityLogs(raidPtr, freeLogList);
514 }
515
516 int
517 rf_ShutdownLogging(RF_Raid_t * raidPtr)
518 {
519 /* shutdown parity logging 1) disable parity logging in all regions 2)
520 * reintegrate all regions */
521
522 RF_SectorCount_t diskCount;
523 RF_RegionId_t regionID;
524 RF_ParityLog_t *log;
525
526 if (rf_parityLogDebug)
527 printf("[shutting down parity logging]\n");
528 /* Since parity log maps are volatile, we must reintegrate all
529 * regions. */
530 if (rf_forceParityLogReint) {
531 for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
532 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
533 raidPtr->regionInfo[regionID].loggingEnabled = RF_FALSE;
534 log = raidPtr->regionInfo[regionID].coreLog;
535 raidPtr->regionInfo[regionID].coreLog = NULL;
536 diskCount = raidPtr->regionInfo[regionID].diskCount;
537 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
538 if (diskCount > 0 || log != NULL)
539 ReintegrateRegion(raidPtr, regionID, log);
540 if (log != NULL)
541 rf_ReleaseParityLogs(raidPtr, log);
542 }
543 }
544 if (rf_parityLogDebug) {
545 printf("[parity logging disabled]\n");
546 printf("[should be done!]\n");
547 }
548 return (0);
549 }
550
551 int
552 rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr)
553 {
554 RF_ParityLog_t *reintQueue, *flushQueue;
555 int workNeeded, done = RF_FALSE;
556
557 /* Main program for parity logging disk thread. This routine waits
558 * for work to appear in either the flush or reintegration queues and
559 * is responsible for flushing core logs to the log disk as well as
560 * reintegrating parity regions.
561 *
562 * BLOCKING */
563
564 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
565
566 /*
567 * Inform our creator that we're running. Don't bother doing the
568 * mutex lock/unlock dance- we locked above, and we'll unlock
569 * below with nothing to do, yet.
570 */
571 raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING;
572 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
573
574 /* empty the work queues */
575 flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
576 raidPtr->parityLogDiskQueue.flushQueue = NULL;
577 reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
578 raidPtr->parityLogDiskQueue.reintQueue = NULL;
579 workNeeded = (flushQueue || reintQueue);
580
581 while (!done) {
582 while (workNeeded) {
583 /* First, flush all logs in the flush queue, freeing
584 * buffers Second, reintegrate all regions which are
585 * reported as full. Third, append queued log data
586 * until blocked.
587 *
588 * Note: Incoming appends (ParityLogAppend) can block on
589 * either 1. empty buffer pool 2. region under
590 * reintegration To preserve a global FIFO ordering of
591 * appends, buffers are not released to the world
592 * until those appends blocked on buffers are removed
593 * from the append queue. Similarly, regions which
594 * are reintegrated are not opened for general use
595 * until the append queue has been emptied. */
596
597 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
598
599 /* empty flushQueue, using free'd log buffers to
600 * process bufTail */
601 if (flushQueue)
602 FlushLogsToDisk(raidPtr, flushQueue);
603
604 /* empty reintQueue, flushing from reintTail as we go */
605 if (reintQueue)
606 ReintegrateLogs(raidPtr, reintQueue);
607
608 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
609 flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
610 raidPtr->parityLogDiskQueue.flushQueue = NULL;
611 reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
612 raidPtr->parityLogDiskQueue.reintQueue = NULL;
613 workNeeded = (flushQueue || reintQueue);
614 }
615 /* no work is needed at this point */
616 if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) {
617 /* shutdown parity logging 1. disable parity logging
618 * in all regions 2. reintegrate all regions */
619 done = RF_TRUE; /* thread disabled, no work needed */
620 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
621 rf_ShutdownLogging(raidPtr);
622 }
623 if (!done) {
624 /* thread enabled, no work needed, so sleep */
625 if (rf_parityLogDebug)
626 printf("[parity logging disk manager sleeping]\n");
627 RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex);
628 if (rf_parityLogDebug)
629 printf("[parity logging disk manager just woke up]\n");
630 flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
631 raidPtr->parityLogDiskQueue.flushQueue = NULL;
632 reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
633 raidPtr->parityLogDiskQueue.reintQueue = NULL;
634 workNeeded = (flushQueue || reintQueue);
635 }
636 }
637 /*
638 * Announce that we're done.
639 */
640 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
641 raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN;
642 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
643 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
644 #if defined(__NetBSD__) && defined(_KERNEL)
645 /*
646 * In the NetBSD kernel, the thread must exit; returning would
647 * cause the proc trampoline to attempt to return to userspace.
648 */
649 kthread_exit(0); /* does not return */
650 #else
651 return (0);
652 #endif
653 }
654 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
655