rf_paritylogDiskMgr.c revision 1.1 1 /* $NetBSD: rf_paritylogDiskMgr.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28 /* Code for flushing and reintegration operations related to parity logging.
29 *
30 * :
31 * Log: rf_paritylogDiskMgr.c,v
32 * Revision 1.25 1996/07/28 20:31:39 jimz
33 * i386netbsd port
34 * true/false fixup
35 *
36 * Revision 1.24 1996/07/27 23:36:08 jimz
37 * Solaris port of simulator
38 *
39 * Revision 1.23 1996/07/22 19:52:16 jimz
40 * switched node params to RF_DagParam_t, a union of
41 * a 64-bit int and a void *, for better portability
42 * attempted hpux port, but failed partway through for
43 * lack of a single C compiler capable of compiling all
44 * source files
45 *
46 * Revision 1.22 1996/06/11 10:17:33 jimz
47 * Put in thread startup/shutdown mechanism for proper synchronization
48 * with start and end of day routines.
49 *
50 * Revision 1.21 1996/06/09 02:36:46 jimz
51 * lots of little crufty cleanup- fixup whitespace
52 * issues, comment #ifdefs, improve typing in some
53 * places (esp size-related)
54 *
55 * Revision 1.20 1996/06/07 21:33:04 jimz
56 * begin using consistent types for sector numbers,
57 * stripe numbers, row+col numbers, recon unit numbers
58 *
59 * Revision 1.19 1996/06/05 18:06:02 jimz
60 * Major code cleanup. The Great Renaming is now done.
61 * Better modularity. Better typing. Fixed a bunch of
62 * synchronization bugs. Made a lot of global stuff
63 * per-desc or per-array. Removed dead code.
64 *
65 * Revision 1.18 1996/06/02 17:31:48 jimz
66 * Moved a lot of global stuff into array structure, where it belongs.
67 * Fixed up paritylogging, pss modules in this manner. Some general
68 * code cleanup. Removed lots of dead code, some dead files.
69 *
70 * Revision 1.17 1996/05/31 22:26:54 jimz
71 * fix a lot of mapping problems, memory allocation problems
72 * found some weird lock issues, fixed 'em
73 * more code cleanup
74 *
75 * Revision 1.16 1996/05/30 23:22:16 jimz
76 * bugfixes of serialization, timing problems
77 * more cleanup
78 *
79 * Revision 1.15 1996/05/30 12:59:18 jimz
80 * make etimer happier, more portable
81 *
82 * Revision 1.14 1996/05/30 11:29:41 jimz
83 * Numerous bug fixes. Stripe lock release code disagreed with the taking code
84 * about when stripes should be locked (I made it consistent: no parity, no lock)
85 * There was a lot of extra serialization of I/Os which I've removed- a lot of
86 * it was to calculate values for the cache code, which is no longer with us.
87 * More types, function, macro cleanup. Added code to properly quiesce the array
88 * on shutdown. Made a lot of stuff array-specific which was (bogusly) general
89 * before. Fixed memory allocation, freeing bugs.
90 *
91 * Revision 1.13 1996/05/27 18:56:37 jimz
92 * more code cleanup
93 * better typing
94 * compiles in all 3 environments
95 *
96 * Revision 1.12 1996/05/24 22:17:04 jimz
97 * continue code + namespace cleanup
98 * typed a bunch of flags
99 *
100 * Revision 1.11 1996/05/24 04:28:55 jimz
101 * release cleanup ckpt
102 *
103 * Revision 1.10 1996/05/23 21:46:35 jimz
104 * checkpoint in code cleanup (release prep)
105 * lots of types, function names have been fixed
106 *
107 * Revision 1.9 1996/05/23 00:33:23 jimz
108 * code cleanup: move all debug decls to rf_options.c, all extern
109 * debug decls to rf_options.h, all debug vars preceded by rf_
110 *
111 * Revision 1.8 1996/05/18 19:51:34 jimz
112 * major code cleanup- fix syntax, make some types consistent,
113 * add prototypes, clean out dead code, et cetera
114 *
115 * Revision 1.7 1995/12/12 18:10:06 jimz
116 * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
117 * fix 80-column brain damage in comments
118 *
119 * Revision 1.6 1995/12/06 20:58:27 wvcii
120 * added prototypes
121 *
122 * Revision 1.5 1995/11/30 16:06:05 wvcii
123 * added copyright info
124 *
125 * Revision 1.4 1995/10/09 22:41:10 wvcii
126 * minor bug fix
127 *
128 * Revision 1.3 1995/10/08 20:43:47 wvcii
129 * lots of random debugging - debugging still incomplete
130 *
131 * Revision 1.2 1995/09/07 15:52:19 jimz
132 * noop compile when INCLUDE_PARITYLOGGING not defined
133 *
134 * Revision 1.1 1995/09/06 19:24:44 wvcii
135 * Initial revision
136 *
137 */
138
139 #include "rf_archs.h"
140
141 #if RF_INCLUDE_PARITYLOGGING > 0
142
143 #include "rf_types.h"
144 #include "rf_threadstuff.h"
145 #include "rf_mcpair.h"
146 #include "rf_raid.h"
147 #include "rf_dag.h"
148 #include "rf_dagfuncs.h"
149 #include "rf_desc.h"
150 #include "rf_layout.h"
151 #include "rf_diskqueue.h"
152 #include "rf_paritylog.h"
153 #include "rf_general.h"
154 #include "rf_threadid.h"
155 #include "rf_etimer.h"
156 #include "rf_paritylogging.h"
157 #include "rf_engine.h"
158 #include "rf_dagutils.h"
159 #include "rf_map.h"
160 #include "rf_parityscan.h"
161 #include "rf_sys.h"
162
163 #include "rf_paritylogDiskMgr.h"
164
165 static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *);
166
167 static caddr_t AcquireReintBuffer(pool)
168 RF_RegionBufferQueue_t *pool;
169 {
170 caddr_t bufPtr = NULL;
171
172 /* Return a region buffer from the free list (pool).
173 If the free list is empty, WAIT.
174 BLOCKING */
175
176 RF_LOCK_MUTEX(pool->mutex);
177 if (pool->availableBuffers > 0) {
178 bufPtr = pool->buffers[pool->availBuffersIndex];
179 pool->availableBuffers--;
180 pool->availBuffersIndex++;
181 if (pool->availBuffersIndex == pool->totalBuffers)
182 pool->availBuffersIndex = 0;
183 RF_UNLOCK_MUTEX(pool->mutex);
184 }
185 else {
186 RF_PANIC(); /* should never happen in currect config, single reint */
187 RF_WAIT_COND(pool->cond, pool->mutex);
188 }
189 return(bufPtr);
190 }
191
192 static void ReleaseReintBuffer(
193 RF_RegionBufferQueue_t *pool,
194 caddr_t bufPtr)
195 {
196 /* Insert a region buffer (bufPtr) into the free list (pool).
197 NON-BLOCKING */
198
199 RF_LOCK_MUTEX(pool->mutex);
200 pool->availableBuffers++;
201 pool->buffers[pool->emptyBuffersIndex] = bufPtr;
202 pool->emptyBuffersIndex++;
203 if (pool->emptyBuffersIndex == pool->totalBuffers)
204 pool->emptyBuffersIndex = 0;
205 RF_ASSERT(pool->availableBuffers <= pool->totalBuffers);
206 RF_UNLOCK_MUTEX(pool->mutex);
207 RF_SIGNAL_COND(pool->cond);
208 }
209
210
211
212 static void ReadRegionLog(
213 RF_RegionId_t regionID,
214 RF_MCPair_t *rrd_mcpair,
215 caddr_t regionBuffer,
216 RF_Raid_t *raidPtr,
217 RF_DagHeader_t **rrd_dag_h,
218 RF_AllocListElem_t **rrd_alloclist,
219 RF_PhysDiskAddr_t **rrd_pda)
220 {
221 /* Initiate the read a region log from disk. Once initiated, return
222 to the calling routine.
223
224 NON-BLOCKING
225 */
226
227 RF_AccTraceEntry_t tracerec;
228 RF_DagNode_t *rrd_rdNode;
229
230 /* create DAG to read region log from disk */
231 rf_MakeAllocList(*rrd_alloclist);
232 *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, rf_DiskReadFunc, rf_DiskReadUndoFunc,
233 "Rrl", *rrd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
234
235 /* create and initialize PDA for the core log */
236 /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
237 *rrd_pda = rf_AllocPDAList(1);
238 rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), &((*rrd_pda)->col), &((*rrd_pda)->startSector));
239 (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity;
240
241 if ((*rrd_pda)->next) {
242 (*rrd_pda)->next = NULL;
243 printf("set rrd_pda->next to NULL\n");
244 }
245
246 /* initialize DAG parameters */
247 bzero((char *)&tracerec,sizeof(tracerec));
248 (*rrd_dag_h)->tracerec = &tracerec;
249 rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0];
250 rrd_rdNode->params[0].p = *rrd_pda;
251 /* rrd_rdNode->params[1] = regionBuffer; */
252 rrd_rdNode->params[2].v = 0;
253 rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
254
255 /* launch region log read dag */
256 rf_DispatchDAG(*rrd_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
257 (void *) rrd_mcpair);
258 }
259
260
261
262 static void WriteCoreLog(
263 RF_ParityLog_t *log,
264 RF_MCPair_t *fwr_mcpair,
265 RF_Raid_t *raidPtr,
266 RF_DagHeader_t **fwr_dag_h,
267 RF_AllocListElem_t **fwr_alloclist,
268 RF_PhysDiskAddr_t **fwr_pda)
269 {
270 RF_RegionId_t regionID = log->regionID;
271 RF_AccTraceEntry_t tracerec;
272 RF_SectorNum_t regionOffset;
273 RF_DagNode_t *fwr_wrNode;
274
275 /* Initiate the write of a core log to a region log disk.
276 Once initiated, return to the calling routine.
277
278 NON-BLOCKING
279 */
280
281 /* create DAG to write a core log to a region log disk */
282 rf_MakeAllocList(*fwr_alloclist);
283 *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
284 "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
285
286 /* create and initialize PDA for the region log */
287 /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
288 *fwr_pda = rf_AllocPDAList(1);
289 regionOffset = log->diskOffset;
290 rf_MapLogParityLogging(raidPtr, regionID, regionOffset, &((*fwr_pda)->row), &((*fwr_pda)->col), &((*fwr_pda)->startSector));
291 (*fwr_pda)->numSector = raidPtr->numSectorsPerLog;
292
293 /* initialize DAG parameters */
294 bzero((char *)&tracerec,sizeof(tracerec));
295 (*fwr_dag_h)->tracerec = &tracerec;
296 fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0];
297 fwr_wrNode->params[0].p = *fwr_pda;
298 /* fwr_wrNode->params[1] = log->bufPtr; */
299 fwr_wrNode->params[2].v = 0;
300 fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
301
302 /* launch the dag to write the core log to disk */
303 rf_DispatchDAG(*fwr_dag_h, (void (*)(void *)) rf_MCPairWakeupFunc,
304 (void *) fwr_mcpair);
305 }
306
307
308 static void ReadRegionParity(
309 RF_RegionId_t regionID,
310 RF_MCPair_t *prd_mcpair,
311 caddr_t parityBuffer,
312 RF_Raid_t *raidPtr,
313 RF_DagHeader_t **prd_dag_h,
314 RF_AllocListElem_t **prd_alloclist,
315 RF_PhysDiskAddr_t **prd_pda)
316 {
317 /* Initiate the read region parity from disk.
318 Once initiated, return to the calling routine.
319
320 NON-BLOCKING
321 */
322
323 RF_AccTraceEntry_t tracerec;
324 RF_DagNode_t *prd_rdNode;
325
326 /* create DAG to read region parity from disk */
327 rf_MakeAllocList(*prd_alloclist);
328 *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, rf_DiskReadUndoFunc,
329 "Rrp", *prd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
330
331 /* create and initialize PDA for region parity */
332 /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
333 *prd_pda = rf_AllocPDAList(1);
334 rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), &((*prd_pda)->col), &((*prd_pda)->startSector), &((*prd_pda)->numSector));
335 if (rf_parityLogDebug)
336 printf("[reading %d sectors of parity from region %d]\n",
337 (int)(*prd_pda)->numSector, regionID);
338 if ((*prd_pda)->next) {
339 (*prd_pda)->next = NULL;
340 printf("set prd_pda->next to NULL\n");
341 }
342
343 /* initialize DAG parameters */
344 bzero((char *)&tracerec,sizeof(tracerec));
345 (*prd_dag_h)->tracerec = &tracerec;
346 prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0];
347 prd_rdNode->params[0].p = *prd_pda;
348 prd_rdNode->params[1].p = parityBuffer;
349 prd_rdNode->params[2].v = 0;
350 prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
351 if (rf_validateDAGDebug)
352 rf_ValidateDAG(*prd_dag_h);
353 /* launch region parity read dag */
354 rf_DispatchDAG(*prd_dag_h, (void (*)(void *)) rf_MCPairWakeupFunc,
355 (void *) prd_mcpair);
356 }
357
358 static void WriteRegionParity(
359 RF_RegionId_t regionID,
360 RF_MCPair_t *pwr_mcpair,
361 caddr_t parityBuffer,
362 RF_Raid_t *raidPtr,
363 RF_DagHeader_t **pwr_dag_h,
364 RF_AllocListElem_t **pwr_alloclist,
365 RF_PhysDiskAddr_t **pwr_pda)
366 {
367 /* Initiate the write of region parity to disk.
368 Once initiated, return to the calling routine.
369
370 NON-BLOCKING
371 */
372
373 RF_AccTraceEntry_t tracerec;
374 RF_DagNode_t *pwr_wrNode;
375
376 /* create DAG to write region log from disk */
377 rf_MakeAllocList(*pwr_alloclist);
378 *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
379 "Wrp", *pwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
380
381 /* create and initialize PDA for region parity */
382 /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
383 *pwr_pda = rf_AllocPDAList(1);
384 rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), &((*pwr_pda)->col), &((*pwr_pda)->startSector), &((*pwr_pda)->numSector));
385
386 /* initialize DAG parameters */
387 bzero((char *)&tracerec,sizeof(tracerec));
388 (*pwr_dag_h)->tracerec = &tracerec;
389 pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0];
390 pwr_wrNode->params[0].p = *pwr_pda;
391 /* pwr_wrNode->params[1] = parityBuffer; */
392 pwr_wrNode->params[2].v = 0;
393 pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
394
395 /* launch the dag to write region parity to disk */
396 rf_DispatchDAG(*pwr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
397 (void *) pwr_mcpair);
398 }
399
400 static void FlushLogsToDisk(
401 RF_Raid_t *raidPtr,
402 RF_ParityLog_t *logList)
403 {
404 /* Flush a linked list of core logs to the log disk.
405 Logs contain the disk location where they should be
406 written. Logs were written in FIFO order and that
407 order must be preserved.
408
409 Recommended optimizations:
410 1) allow multiple flushes to occur simultaneously
411 2) coalesce contiguous flush operations
412
413 BLOCKING
414 */
415
416 RF_ParityLog_t *log;
417 RF_RegionId_t regionID;
418 RF_MCPair_t *fwr_mcpair;
419 RF_DagHeader_t *fwr_dag_h;
420 RF_AllocListElem_t *fwr_alloclist;
421 RF_PhysDiskAddr_t *fwr_pda;
422
423 fwr_mcpair = rf_AllocMCPair();
424 RF_LOCK_MUTEX(fwr_mcpair->mutex);
425
426 RF_ASSERT(logList);
427 log = logList;
428 while (log)
429 {
430 regionID = log->regionID;
431
432 /* create and launch a DAG to write the core log */
433 if (rf_parityLogDebug)
434 printf("[initiating write of core log for region %d]\n", regionID);
435 fwr_mcpair->flag = RF_FALSE;
436 WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, &fwr_alloclist, &fwr_pda);
437
438 /* wait for the DAG to complete */
439 #ifndef SIMULATE
440 while (!fwr_mcpair->flag)
441 RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex);
442 #endif /* !SIMULATE */
443 if (fwr_dag_h->status != rf_enable)
444 {
445 RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID);
446 RF_ASSERT(0);
447 }
448
449 /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */
450 rf_FreePhysDiskAddr(fwr_pda);
451 rf_FreeDAG(fwr_dag_h);
452 rf_FreeAllocList(fwr_alloclist);
453
454 log = log->next;
455 }
456 RF_UNLOCK_MUTEX(fwr_mcpair->mutex);
457 rf_FreeMCPair(fwr_mcpair);
458 rf_ReleaseParityLogs(raidPtr, logList);
459 }
460
461 static void ReintegrateRegion(
462 RF_Raid_t *raidPtr,
463 RF_RegionId_t regionID,
464 RF_ParityLog_t *coreLog)
465 {
466 RF_MCPair_t *rrd_mcpair=NULL, *prd_mcpair, *pwr_mcpair;
467 RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h;
468 RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist;
469 RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda;
470 caddr_t parityBuffer, regionBuffer=NULL;
471
472 /* Reintegrate a region (regionID).
473 1. acquire region and parity buffers
474 2. read log from disk
475 3. read parity from disk
476 4. apply log to parity
477 5. apply core log to parity
478 6. write new parity to disk
479
480 BLOCKING
481 */
482
483 if (rf_parityLogDebug)
484 printf("[reintegrating region %d]\n", regionID);
485
486 /* initiate read of region parity */
487 if (rf_parityLogDebug)
488 printf("[initiating read of parity for region %d]\n", regionID);
489 parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool);
490 prd_mcpair = rf_AllocMCPair();
491 RF_LOCK_MUTEX(prd_mcpair->mutex);
492 prd_mcpair->flag = RF_FALSE;
493 ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, &prd_dag_h, &prd_alloclist, &prd_pda);
494
495 /* if region log nonempty, initiate read */
496 if (raidPtr->regionInfo[regionID].diskCount > 0)
497 {
498 if (rf_parityLogDebug)
499 printf("[initiating read of disk log for region %d]\n", regionID);
500 regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool);
501 rrd_mcpair = rf_AllocMCPair();
502 RF_LOCK_MUTEX(rrd_mcpair->mutex);
503 rrd_mcpair->flag = RF_FALSE;
504 ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, &rrd_dag_h, &rrd_alloclist, &rrd_pda);
505 }
506
507 /* wait on read of region parity to complete */
508 #ifndef SIMULATE
509 while (!prd_mcpair->flag) {
510 RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex);
511 }
512 #endif /* !SIMULATE */
513 RF_UNLOCK_MUTEX(prd_mcpair->mutex);
514 if (prd_dag_h->status != rf_enable)
515 {
516 RF_ERRORMSG("Unable to read parity from disk\n");
517 /* add code to fail the parity disk */
518 RF_ASSERT(0);
519 }
520
521 /* apply core log to parity */
522 /* if (coreLog)
523 ApplyLogsToParity(coreLog, parityBuffer); */
524
525 if (raidPtr->regionInfo[regionID].diskCount > 0)
526 {
527 /* wait on read of region log to complete */
528 #ifndef SIMULATE
529 while (!rrd_mcpair->flag)
530 RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex);
531 #endif /* !SIMULATE */
532 RF_UNLOCK_MUTEX(rrd_mcpair->mutex);
533 if (rrd_dag_h->status != rf_enable)
534 {
535 RF_ERRORMSG("Unable to read region log from disk\n");
536 /* add code to fail the log disk */
537 RF_ASSERT(0);
538 }
539 /* apply region log to parity */
540 /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */
541 /* release resources associated with region log */
542 /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */
543 rf_FreePhysDiskAddr(rrd_pda);
544 rf_FreeDAG(rrd_dag_h);
545 rf_FreeAllocList(rrd_alloclist);
546 rf_FreeMCPair(rrd_mcpair);
547 ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer);
548 }
549
550 /* write reintegrated parity to disk */
551 if (rf_parityLogDebug)
552 printf("[initiating write of parity for region %d]\n", regionID);
553 pwr_mcpair = rf_AllocMCPair();
554 RF_LOCK_MUTEX(pwr_mcpair->mutex);
555 pwr_mcpair->flag = RF_FALSE;
556 WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, &pwr_dag_h, &pwr_alloclist, &pwr_pda);
557 #ifndef SIMULATE
558 while (!pwr_mcpair->flag)
559 RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex);
560 #endif /* !SIMULATE */
561 RF_UNLOCK_MUTEX(pwr_mcpair->mutex);
562 if (pwr_dag_h->status != rf_enable)
563 {
564 RF_ERRORMSG("Unable to write parity to disk\n");
565 /* add code to fail the parity disk */
566 RF_ASSERT(0);
567 }
568
569 /* release resources associated with read of old parity */
570 /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */
571 rf_FreePhysDiskAddr(prd_pda);
572 rf_FreeDAG(prd_dag_h);
573 rf_FreeAllocList(prd_alloclist);
574 rf_FreeMCPair(prd_mcpair);
575
576 /* release resources associated with write of new parity */
577 ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer);
578 /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */
579 rf_FreePhysDiskAddr(pwr_pda);
580 rf_FreeDAG(pwr_dag_h);
581 rf_FreeAllocList(pwr_alloclist);
582 rf_FreeMCPair(pwr_mcpair);
583
584 if (rf_parityLogDebug)
585 printf("[finished reintegrating region %d]\n", regionID);
586 }
587
588
589
590 static void ReintegrateLogs(
591 RF_Raid_t *raidPtr,
592 RF_ParityLog_t *logList)
593 {
594 RF_ParityLog_t *log, *freeLogList = NULL;
595 RF_ParityLogData_t *logData, *logDataList;
596 RF_RegionId_t regionID;
597
598 RF_ASSERT(logList);
599 while (logList)
600 {
601 log = logList;
602 logList = logList->next;
603 log->next = NULL;
604 regionID = log->regionID;
605 ReintegrateRegion(raidPtr, regionID, log);
606 log->numRecords = 0;
607
608 /* remove all items which are blocked on reintegration of this region */
609 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
610 logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
611 logDataList = logData;
612 while (logData)
613 {
614 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
615 logData = logData->next;
616 }
617 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
618
619 /* process blocked log data and clear reintInProgress flag for this region */
620 if (logDataList)
621 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE);
622 else
623 {
624 /* Enable flushing for this region. Holding both locks provides
625 a synchronization barrier with DumpParityLogToDisk
626 */
627 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
628 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
629 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
630 raidPtr->regionInfo[regionID].diskCount = 0;
631 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
632 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
633 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now enabled */
634 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
635 }
636 /* if log wasn't used, attach it to the list of logs to be returned */
637 if (log)
638 {
639 log->next = freeLogList;
640 freeLogList = log;
641 }
642 }
643 if (freeLogList)
644 rf_ReleaseParityLogs(raidPtr, freeLogList);
645 }
646
647 int rf_ShutdownLogging(RF_Raid_t *raidPtr)
648 {
649 /* shutdown parity logging
650 1) disable parity logging in all regions
651 2) reintegrate all regions
652 */
653
654 RF_SectorCount_t diskCount;
655 RF_RegionId_t regionID;
656 RF_ParityLog_t *log;
657
658 if (rf_parityLogDebug)
659 printf("[shutting down parity logging]\n");
660 /* Since parity log maps are volatile, we must reintegrate all regions. */
661 if (rf_forceParityLogReint) {
662 for (regionID = 0; regionID < rf_numParityRegions; regionID++)
663 {
664 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
665 raidPtr->regionInfo[regionID].loggingEnabled = RF_FALSE;
666 log = raidPtr->regionInfo[regionID].coreLog;
667 raidPtr->regionInfo[regionID].coreLog = NULL;
668 diskCount = raidPtr->regionInfo[regionID].diskCount;
669 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
670 if (diskCount > 0 || log != NULL)
671 ReintegrateRegion(raidPtr, regionID, log);
672 if (log != NULL)
673 rf_ReleaseParityLogs(raidPtr, log);
674 }
675 }
676 if (rf_parityLogDebug)
677 {
678 printf("[parity logging disabled]\n");
679 printf("[should be done!]\n");
680 }
681 return(0);
682 }
683
684 int rf_ParityLoggingDiskManager(RF_Raid_t *raidPtr)
685 {
686 RF_ParityLog_t *reintQueue, *flushQueue;
687 int workNeeded, done = RF_FALSE;
688
689 rf_assign_threadid(); /* don't remove this line */
690
691 /* Main program for parity logging disk thread. This routine waits
692 for work to appear in either the flush or reintegration queues
693 and is responsible for flushing core logs to the log disk as
694 well as reintegrating parity regions.
695
696 BLOCKING
697 */
698
699 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
700
701 /*
702 * Inform our creator that we're running. Don't bother doing the
703 * mutex lock/unlock dance- we locked above, and we'll unlock
704 * below with nothing to do, yet.
705 */
706 raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING;
707 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
708
709 /* empty the work queues */
710 flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL;
711 reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL;
712 workNeeded = (flushQueue || reintQueue);
713
714 while (!done)
715 {
716 while (workNeeded)
717 {
718 /* First, flush all logs in the flush queue, freeing buffers
719 Second, reintegrate all regions which are reported as full.
720 Third, append queued log data until blocked.
721
722 Note: Incoming appends (ParityLogAppend) can block on either
723 1. empty buffer pool
724 2. region under reintegration
725 To preserve a global FIFO ordering of appends, buffers are not
726 released to the world until those appends blocked on buffers are
727 removed from the append queue. Similarly, regions which are
728 reintegrated are not opened for general use until the append
729 queue has been emptied.
730 */
731
732 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
733
734 /* empty flushQueue, using free'd log buffers to process bufTail */
735 if (flushQueue)
736 FlushLogsToDisk(raidPtr, flushQueue);
737
738 /* empty reintQueue, flushing from reintTail as we go */
739 if (reintQueue)
740 ReintegrateLogs(raidPtr, reintQueue);
741
742 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
743 flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL;
744 reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL;
745 workNeeded = (flushQueue || reintQueue);
746 }
747 /* no work is needed at this point */
748 if (raidPtr->parityLogDiskQueue.threadState&RF_PLOG_TERMINATE)
749 {
750 /* shutdown parity logging
751 1. disable parity logging in all regions
752 2. reintegrate all regions
753 */
754 done = RF_TRUE; /* thread disabled, no work needed */
755 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
756 rf_ShutdownLogging(raidPtr);
757 }
758 if (!done)
759 {
760 /* thread enabled, no work needed, so sleep */
761 if (rf_parityLogDebug)
762 printf("[parity logging disk manager sleeping]\n");
763 RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex);
764 if (rf_parityLogDebug)
765 printf("[parity logging disk manager just woke up]\n");
766 flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL;
767 reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL;
768 workNeeded = (flushQueue || reintQueue);
769 }
770 }
771 /*
772 * Announce that we're done.
773 */
774 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
775 raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN;
776 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
777 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
778 #if defined(__NetBSD__) && defined(_KERNEL)
779 /*
780 * In the NetBSD kernel, the thread must exit; returning would
781 * cause the proc trampoline to attempt to return to userspace.
782 */
783 kthread_exit(0); /* does not return */
784 #else
785 return(0);
786 #endif
787 }
788
789 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
790