rf_paritylogDiskMgr.c revision 1.14 1 /* $NetBSD: rf_paritylogDiskMgr.c,v 1.14 2002/09/07 23:11:46 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28 /* Code for flushing and reintegration operations related to parity logging.
29 *
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: rf_paritylogDiskMgr.c,v 1.14 2002/09/07 23:11:46 oster Exp $");
34
35 #include "rf_archs.h"
36
37 #if RF_INCLUDE_PARITYLOGGING > 0
38
39 #include <dev/raidframe/raidframevar.h>
40
41 #include "rf_threadstuff.h"
42 #include "rf_mcpair.h"
43 #include "rf_raid.h"
44 #include "rf_dag.h"
45 #include "rf_dagfuncs.h"
46 #include "rf_desc.h"
47 #include "rf_layout.h"
48 #include "rf_diskqueue.h"
49 #include "rf_paritylog.h"
50 #include "rf_general.h"
51 #include "rf_etimer.h"
52 #include "rf_paritylogging.h"
53 #include "rf_engine.h"
54 #include "rf_dagutils.h"
55 #include "rf_map.h"
56 #include "rf_parityscan.h"
57
58 #include "rf_paritylogDiskMgr.h"
59
60 static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *);
61
62 static caddr_t
63 AcquireReintBuffer(pool)
64 RF_RegionBufferQueue_t *pool;
65 {
66 caddr_t bufPtr = NULL;
67
68 /* Return a region buffer from the free list (pool). If the free list
69 * is empty, WAIT. BLOCKING */
70
71 RF_LOCK_MUTEX(pool->mutex);
72 if (pool->availableBuffers > 0) {
73 bufPtr = pool->buffers[pool->availBuffersIndex];
74 pool->availableBuffers--;
75 pool->availBuffersIndex++;
76 if (pool->availBuffersIndex == pool->totalBuffers)
77 pool->availBuffersIndex = 0;
78 RF_UNLOCK_MUTEX(pool->mutex);
79 } else {
80 RF_PANIC(); /* should never happen in correct config,
81 * single reint */
82 RF_WAIT_COND(pool->cond, pool->mutex);
83 }
84 return (bufPtr);
85 }
86
87 static void
88 ReleaseReintBuffer(
89 RF_RegionBufferQueue_t * pool,
90 caddr_t bufPtr)
91 {
92 /* Insert a region buffer (bufPtr) into the free list (pool).
93 * NON-BLOCKING */
94
95 RF_LOCK_MUTEX(pool->mutex);
96 pool->availableBuffers++;
97 pool->buffers[pool->emptyBuffersIndex] = bufPtr;
98 pool->emptyBuffersIndex++;
99 if (pool->emptyBuffersIndex == pool->totalBuffers)
100 pool->emptyBuffersIndex = 0;
101 RF_ASSERT(pool->availableBuffers <= pool->totalBuffers);
102 RF_UNLOCK_MUTEX(pool->mutex);
103 RF_SIGNAL_COND(pool->cond);
104 }
105
106
107
108 static void
109 ReadRegionLog(
110 RF_RegionId_t regionID,
111 RF_MCPair_t * rrd_mcpair,
112 caddr_t regionBuffer,
113 RF_Raid_t * raidPtr,
114 RF_DagHeader_t ** rrd_dag_h,
115 RF_AllocListElem_t ** rrd_alloclist,
116 RF_PhysDiskAddr_t ** rrd_pda)
117 {
118 /* Initiate the read a region log from disk. Once initiated, return
119 * to the calling routine.
120 *
121 * NON-BLOCKING */
122
123 RF_AccTraceEntry_t *tracerec;
124 RF_DagNode_t *rrd_rdNode;
125
126 /* create DAG to read region log from disk */
127 rf_MakeAllocList(*rrd_alloclist);
128 *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer,
129 rf_DiskReadFunc, rf_DiskReadUndoFunc,
130 "Rrl", *rrd_alloclist,
131 RF_DAG_FLAGS_NONE,
132 RF_IO_NORMAL_PRIORITY);
133
134 /* create and initialize PDA for the core log */
135 /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
136 * *)); */
137 *rrd_pda = rf_AllocPDAList(1);
138 rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row),
139 &((*rrd_pda)->col), &((*rrd_pda)->startSector));
140 (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity;
141
142 if ((*rrd_pda)->next) {
143 (*rrd_pda)->next = NULL;
144 printf("set rrd_pda->next to NULL\n");
145 }
146 /* initialize DAG parameters */
147 RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
148 memset((char *) tracerec, 0, sizeof(RF_AccTraceEntry_t));
149 (*rrd_dag_h)->tracerec = tracerec;
150 rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0];
151 rrd_rdNode->params[0].p = *rrd_pda;
152 /* rrd_rdNode->params[1] = regionBuffer; */
153 rrd_rdNode->params[2].v = 0;
154 rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
155 0, 0, 0);
156
157 /* launch region log read dag */
158 rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
159 (void *) rrd_mcpair);
160 }
161
162
163
164 static void
165 WriteCoreLog(
166 RF_ParityLog_t * log,
167 RF_MCPair_t * fwr_mcpair,
168 RF_Raid_t * raidPtr,
169 RF_DagHeader_t ** fwr_dag_h,
170 RF_AllocListElem_t ** fwr_alloclist,
171 RF_PhysDiskAddr_t ** fwr_pda)
172 {
173 RF_RegionId_t regionID = log->regionID;
174 RF_AccTraceEntry_t *tracerec;
175 RF_SectorNum_t regionOffset;
176 RF_DagNode_t *fwr_wrNode;
177
178 /* Initiate the write of a core log to a region log disk. Once
179 * initiated, return to the calling routine.
180 *
181 * NON-BLOCKING */
182
183 /* create DAG to write a core log to a region log disk */
184 rf_MakeAllocList(*fwr_alloclist);
185 *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr,
186 rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
187 "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
188
189 /* create and initialize PDA for the region log */
190 /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
191 * *)); */
192 *fwr_pda = rf_AllocPDAList(1);
193 regionOffset = log->diskOffset;
194 rf_MapLogParityLogging(raidPtr, regionID, regionOffset,
195 &((*fwr_pda)->row), &((*fwr_pda)->col),
196 &((*fwr_pda)->startSector));
197 (*fwr_pda)->numSector = raidPtr->numSectorsPerLog;
198
199 /* initialize DAG parameters */
200 RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
201 memset((char *) tracerec, 0, sizeof(RF_AccTraceEntry_t));
202 (*fwr_dag_h)->tracerec = tracerec;
203 fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0];
204 fwr_wrNode->params[0].p = *fwr_pda;
205 /* fwr_wrNode->params[1] = log->bufPtr; */
206 fwr_wrNode->params[2].v = 0;
207 fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
208 0, 0, 0);
209
210 /* launch the dag to write the core log to disk */
211 rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
212 (void *) fwr_mcpair);
213 }
214
215
216 static void
217 ReadRegionParity(
218 RF_RegionId_t regionID,
219 RF_MCPair_t * prd_mcpair,
220 caddr_t parityBuffer,
221 RF_Raid_t * raidPtr,
222 RF_DagHeader_t ** prd_dag_h,
223 RF_AllocListElem_t ** prd_alloclist,
224 RF_PhysDiskAddr_t ** prd_pda)
225 {
226 /* Initiate the read region parity from disk. Once initiated, return
227 * to the calling routine.
228 *
229 * NON-BLOCKING */
230
231 RF_AccTraceEntry_t *tracerec;
232 RF_DagNode_t *prd_rdNode;
233
234 /* create DAG to read region parity from disk */
235 rf_MakeAllocList(*prd_alloclist);
236 *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc,
237 rf_DiskReadUndoFunc, "Rrp",
238 *prd_alloclist, RF_DAG_FLAGS_NONE,
239 RF_IO_NORMAL_PRIORITY);
240
241 /* create and initialize PDA for region parity */
242 /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
243 * *)); */
244 *prd_pda = rf_AllocPDAList(1);
245 rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row),
246 &((*prd_pda)->col), &((*prd_pda)->startSector),
247 &((*prd_pda)->numSector));
248 if (rf_parityLogDebug)
249 printf("[reading %d sectors of parity from region %d]\n",
250 (int) (*prd_pda)->numSector, regionID);
251 if ((*prd_pda)->next) {
252 (*prd_pda)->next = NULL;
253 printf("set prd_pda->next to NULL\n");
254 }
255 /* initialize DAG parameters */
256 RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
257 memset((char *) tracerec, 0, sizeof(RF_AccTraceEntry_t));
258 (*prd_dag_h)->tracerec = tracerec;
259 prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0];
260 prd_rdNode->params[0].p = *prd_pda;
261 prd_rdNode->params[1].p = parityBuffer;
262 prd_rdNode->params[2].v = 0;
263 prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
264 0, 0, 0);
265 #if RF_DEBUG_VALIDATE_DAG
266 if (rf_validateDAGDebug)
267 rf_ValidateDAG(*prd_dag_h);
268 #endif
269 /* launch region parity read dag */
270 rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
271 (void *) prd_mcpair);
272 }
273
274 static void
275 WriteRegionParity(
276 RF_RegionId_t regionID,
277 RF_MCPair_t * pwr_mcpair,
278 caddr_t parityBuffer,
279 RF_Raid_t * raidPtr,
280 RF_DagHeader_t ** pwr_dag_h,
281 RF_AllocListElem_t ** pwr_alloclist,
282 RF_PhysDiskAddr_t ** pwr_pda)
283 {
284 /* Initiate the write of region parity to disk. Once initiated, return
285 * to the calling routine.
286 *
287 * NON-BLOCKING */
288
289 RF_AccTraceEntry_t *tracerec;
290 RF_DagNode_t *pwr_wrNode;
291
292 /* create DAG to write region log from disk */
293 rf_MakeAllocList(*pwr_alloclist);
294 *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer,
295 rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
296 "Wrp", *pwr_alloclist,
297 RF_DAG_FLAGS_NONE,
298 RF_IO_NORMAL_PRIORITY);
299
300 /* create and initialize PDA for region parity */
301 /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t
302 * *)); */
303 *pwr_pda = rf_AllocPDAList(1);
304 rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row),
305 &((*pwr_pda)->col), &((*pwr_pda)->startSector),
306 &((*pwr_pda)->numSector));
307
308 /* initialize DAG parameters */
309 RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
310 memset((char *) tracerec, 0, sizeof(RF_AccTraceEntry_t));
311 (*pwr_dag_h)->tracerec = tracerec;
312 pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0];
313 pwr_wrNode->params[0].p = *pwr_pda;
314 /* pwr_wrNode->params[1] = parityBuffer; */
315 pwr_wrNode->params[2].v = 0;
316 pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
317 0, 0, 0);
318
319 /* launch the dag to write region parity to disk */
320 rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
321 (void *) pwr_mcpair);
322 }
323
324 static void
325 FlushLogsToDisk(
326 RF_Raid_t * raidPtr,
327 RF_ParityLog_t * logList)
328 {
329 /* Flush a linked list of core logs to the log disk. Logs contain the
330 * disk location where they should be written. Logs were written in
331 * FIFO order and that order must be preserved.
332 *
333 * Recommended optimizations: 1) allow multiple flushes to occur
334 * simultaneously 2) coalesce contiguous flush operations
335 *
336 * BLOCKING */
337
338 RF_ParityLog_t *log;
339 RF_RegionId_t regionID;
340 RF_MCPair_t *fwr_mcpair;
341 RF_DagHeader_t *fwr_dag_h;
342 RF_AllocListElem_t *fwr_alloclist;
343 RF_PhysDiskAddr_t *fwr_pda;
344
345 fwr_mcpair = rf_AllocMCPair();
346 RF_LOCK_MUTEX(fwr_mcpair->mutex);
347
348 RF_ASSERT(logList);
349 log = logList;
350 while (log) {
351 regionID = log->regionID;
352
353 /* create and launch a DAG to write the core log */
354 if (rf_parityLogDebug)
355 printf("[initiating write of core log for region %d]\n", regionID);
356 fwr_mcpair->flag = RF_FALSE;
357 WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h,
358 &fwr_alloclist, &fwr_pda);
359
360 /* wait for the DAG to complete */
361 while (!fwr_mcpair->flag)
362 RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex);
363 if (fwr_dag_h->status != rf_enable) {
364 RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID);
365 RF_ASSERT(0);
366 }
367 /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */
368 rf_FreePhysDiskAddr(fwr_pda);
369 rf_FreeDAG(fwr_dag_h);
370 rf_FreeAllocList(fwr_alloclist);
371
372 log = log->next;
373 }
374 RF_UNLOCK_MUTEX(fwr_mcpair->mutex);
375 rf_FreeMCPair(fwr_mcpair);
376 rf_ReleaseParityLogs(raidPtr, logList);
377 }
378
379 static void
380 ReintegrateRegion(
381 RF_Raid_t * raidPtr,
382 RF_RegionId_t regionID,
383 RF_ParityLog_t * coreLog)
384 {
385 RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair;
386 RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h;
387 RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist;
388 RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda;
389 caddr_t parityBuffer, regionBuffer = NULL;
390
391 /* Reintegrate a region (regionID).
392 *
393 * 1. acquire region and parity buffers
394 * 2. read log from disk
395 * 3. read parity from disk
396 * 4. apply log to parity
397 * 5. apply core log to parity
398 * 6. write new parity to disk
399 *
400 * BLOCKING */
401
402 if (rf_parityLogDebug)
403 printf("[reintegrating region %d]\n", regionID);
404
405 /* initiate read of region parity */
406 if (rf_parityLogDebug)
407 printf("[initiating read of parity for region %d]\n",regionID);
408 parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool);
409 prd_mcpair = rf_AllocMCPair();
410 RF_LOCK_MUTEX(prd_mcpair->mutex);
411 prd_mcpair->flag = RF_FALSE;
412 ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr,
413 &prd_dag_h, &prd_alloclist, &prd_pda);
414
415 /* if region log nonempty, initiate read */
416 if (raidPtr->regionInfo[regionID].diskCount > 0) {
417 if (rf_parityLogDebug)
418 printf("[initiating read of disk log for region %d]\n",
419 regionID);
420 regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool);
421 rrd_mcpair = rf_AllocMCPair();
422 RF_LOCK_MUTEX(rrd_mcpair->mutex);
423 rrd_mcpair->flag = RF_FALSE;
424 ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr,
425 &rrd_dag_h, &rrd_alloclist, &rrd_pda);
426 }
427 /* wait on read of region parity to complete */
428 while (!prd_mcpair->flag) {
429 RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex);
430 }
431 RF_UNLOCK_MUTEX(prd_mcpair->mutex);
432 if (prd_dag_h->status != rf_enable) {
433 RF_ERRORMSG("Unable to read parity from disk\n");
434 /* add code to fail the parity disk */
435 RF_ASSERT(0);
436 }
437 /* apply core log to parity */
438 /* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */
439
440 if (raidPtr->regionInfo[regionID].diskCount > 0) {
441 /* wait on read of region log to complete */
442 while (!rrd_mcpair->flag)
443 RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex);
444 RF_UNLOCK_MUTEX(rrd_mcpair->mutex);
445 if (rrd_dag_h->status != rf_enable) {
446 RF_ERRORMSG("Unable to read region log from disk\n");
447 /* add code to fail the log disk */
448 RF_ASSERT(0);
449 }
450 /* apply region log to parity */
451 /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */
452 /* release resources associated with region log */
453 /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */
454 rf_FreePhysDiskAddr(rrd_pda);
455 rf_FreeDAG(rrd_dag_h);
456 rf_FreeAllocList(rrd_alloclist);
457 rf_FreeMCPair(rrd_mcpair);
458 ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer);
459 }
460 /* write reintegrated parity to disk */
461 if (rf_parityLogDebug)
462 printf("[initiating write of parity for region %d]\n",
463 regionID);
464 pwr_mcpair = rf_AllocMCPair();
465 RF_LOCK_MUTEX(pwr_mcpair->mutex);
466 pwr_mcpair->flag = RF_FALSE;
467 WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr,
468 &pwr_dag_h, &pwr_alloclist, &pwr_pda);
469 while (!pwr_mcpair->flag)
470 RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex);
471 RF_UNLOCK_MUTEX(pwr_mcpair->mutex);
472 if (pwr_dag_h->status != rf_enable) {
473 RF_ERRORMSG("Unable to write parity to disk\n");
474 /* add code to fail the parity disk */
475 RF_ASSERT(0);
476 }
477 /* release resources associated with read of old parity */
478 /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */
479 rf_FreePhysDiskAddr(prd_pda);
480 rf_FreeDAG(prd_dag_h);
481 rf_FreeAllocList(prd_alloclist);
482 rf_FreeMCPair(prd_mcpair);
483
484 /* release resources associated with write of new parity */
485 ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer);
486 /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */
487 rf_FreePhysDiskAddr(pwr_pda);
488 rf_FreeDAG(pwr_dag_h);
489 rf_FreeAllocList(pwr_alloclist);
490 rf_FreeMCPair(pwr_mcpair);
491
492 if (rf_parityLogDebug)
493 printf("[finished reintegrating region %d]\n", regionID);
494 }
495
496
497
498 static void
499 ReintegrateLogs(
500 RF_Raid_t * raidPtr,
501 RF_ParityLog_t * logList)
502 {
503 RF_ParityLog_t *log, *freeLogList = NULL;
504 RF_ParityLogData_t *logData, *logDataList;
505 RF_RegionId_t regionID;
506
507 RF_ASSERT(logList);
508 while (logList) {
509 log = logList;
510 logList = logList->next;
511 log->next = NULL;
512 regionID = log->regionID;
513 ReintegrateRegion(raidPtr, regionID, log);
514 log->numRecords = 0;
515
516 /* remove all items which are blocked on reintegration of this
517 * region */
518 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
519 logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID,
520 &raidPtr->parityLogDiskQueue.reintBlockHead,
521 &raidPtr->parityLogDiskQueue.reintBlockTail,
522 RF_TRUE);
523 logDataList = logData;
524 while (logData) {
525 logData->next = rf_SearchAndDequeueParityLogData(
526 raidPtr, regionID,
527 &raidPtr->parityLogDiskQueue.reintBlockHead,
528 &raidPtr->parityLogDiskQueue.reintBlockTail,
529 RF_TRUE);
530 logData = logData->next;
531 }
532 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
533
534 /* process blocked log data and clear reintInProgress flag for
535 * this region */
536 if (logDataList)
537 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE);
538 else {
539 /* Enable flushing for this region. Holding both
540 * locks provides a synchronization barrier with
541 * DumpParityLogToDisk */
542 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
543 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
544 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
545 raidPtr->regionInfo[regionID].diskCount = 0;
546 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
547 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
548 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now
549 * enabled */
550 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
551 }
552 /* if log wasn't used, attach it to the list of logs to be
553 * returned */
554 if (log) {
555 log->next = freeLogList;
556 freeLogList = log;
557 }
558 }
559 if (freeLogList)
560 rf_ReleaseParityLogs(raidPtr, freeLogList);
561 }
562
563 int
564 rf_ShutdownLogging(RF_Raid_t * raidPtr)
565 {
566 /* shutdown parity logging 1) disable parity logging in all regions 2)
567 * reintegrate all regions */
568
569 RF_SectorCount_t diskCount;
570 RF_RegionId_t regionID;
571 RF_ParityLog_t *log;
572
573 if (rf_parityLogDebug)
574 printf("[shutting down parity logging]\n");
575 /* Since parity log maps are volatile, we must reintegrate all
576 * regions. */
577 if (rf_forceParityLogReint) {
578 for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
579 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
580 raidPtr->regionInfo[regionID].loggingEnabled =
581 RF_FALSE;
582 log = raidPtr->regionInfo[regionID].coreLog;
583 raidPtr->regionInfo[regionID].coreLog = NULL;
584 diskCount = raidPtr->regionInfo[regionID].diskCount;
585 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
586 if (diskCount > 0 || log != NULL)
587 ReintegrateRegion(raidPtr, regionID, log);
588 if (log != NULL)
589 rf_ReleaseParityLogs(raidPtr, log);
590 }
591 }
592 if (rf_parityLogDebug) {
593 printf("[parity logging disabled]\n");
594 printf("[should be done!]\n");
595 }
596 return (0);
597 }
598
599 int
600 rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr)
601 {
602 RF_ParityLog_t *reintQueue, *flushQueue;
603 int workNeeded, done = RF_FALSE;
604 int s;
605
606 /* Main program for parity logging disk thread. This routine waits
607 * for work to appear in either the flush or reintegration queues and
608 * is responsible for flushing core logs to the log disk as well as
609 * reintegrating parity regions.
610 *
611 * BLOCKING */
612
613 s = splbio();
614
615 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
616
617 /*
618 * Inform our creator that we're running. Don't bother doing the
619 * mutex lock/unlock dance- we locked above, and we'll unlock
620 * below with nothing to do, yet.
621 */
622 raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING;
623 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
624
625 /* empty the work queues */
626 flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
627 raidPtr->parityLogDiskQueue.flushQueue = NULL;
628 reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
629 raidPtr->parityLogDiskQueue.reintQueue = NULL;
630 workNeeded = (flushQueue || reintQueue);
631
632 while (!done) {
633 while (workNeeded) {
634 /* First, flush all logs in the flush queue, freeing
635 * buffers Second, reintegrate all regions which are
636 * reported as full. Third, append queued log data
637 * until blocked.
638 *
639 * Note: Incoming appends (ParityLogAppend) can block on
640 * either 1. empty buffer pool 2. region under
641 * reintegration To preserve a global FIFO ordering of
642 * appends, buffers are not released to the world
643 * until those appends blocked on buffers are removed
644 * from the append queue. Similarly, regions which
645 * are reintegrated are not opened for general use
646 * until the append queue has been emptied. */
647
648 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
649
650 /* empty flushQueue, using free'd log buffers to
651 * process bufTail */
652 if (flushQueue)
653 FlushLogsToDisk(raidPtr, flushQueue);
654
655 /* empty reintQueue, flushing from reintTail as we go */
656 if (reintQueue)
657 ReintegrateLogs(raidPtr, reintQueue);
658
659 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
660 flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
661 raidPtr->parityLogDiskQueue.flushQueue = NULL;
662 reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
663 raidPtr->parityLogDiskQueue.reintQueue = NULL;
664 workNeeded = (flushQueue || reintQueue);
665 }
666 /* no work is needed at this point */
667 if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) {
668 /* shutdown parity logging 1. disable parity logging
669 * in all regions 2. reintegrate all regions */
670 done = RF_TRUE; /* thread disabled, no work needed */
671 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
672 rf_ShutdownLogging(raidPtr);
673 }
674 if (!done) {
675 /* thread enabled, no work needed, so sleep */
676 if (rf_parityLogDebug)
677 printf("[parity logging disk manager sleeping]\n");
678 RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond,
679 raidPtr->parityLogDiskQueue.mutex);
680 if (rf_parityLogDebug)
681 printf("[parity logging disk manager just woke up]\n");
682 flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
683 raidPtr->parityLogDiskQueue.flushQueue = NULL;
684 reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
685 raidPtr->parityLogDiskQueue.reintQueue = NULL;
686 workNeeded = (flushQueue || reintQueue);
687 }
688 }
689 /*
690 * Announce that we're done.
691 */
692 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
693 raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN;
694 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
695 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
696
697 splx(s);
698
699 /*
700 * In the NetBSD kernel, the thread must exit; returning would
701 * cause the proc trampoline to attempt to return to userspace.
702 */
703 kthread_exit(0); /* does not return */
704 }
705 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
706