rf_paritylogDiskMgr.c revision 1.1 1 1.1 oster /* $NetBSD: rf_paritylogDiskMgr.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Author: William V. Courtright II
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster /* Code for flushing and reintegration operations related to parity logging.
29 1.1 oster *
30 1.1 oster * :
31 1.1 oster * Log: rf_paritylogDiskMgr.c,v
32 1.1 oster * Revision 1.25 1996/07/28 20:31:39 jimz
33 1.1 oster * i386netbsd port
34 1.1 oster * true/false fixup
35 1.1 oster *
36 1.1 oster * Revision 1.24 1996/07/27 23:36:08 jimz
37 1.1 oster * Solaris port of simulator
38 1.1 oster *
39 1.1 oster * Revision 1.23 1996/07/22 19:52:16 jimz
40 1.1 oster * switched node params to RF_DagParam_t, a union of
41 1.1 oster * a 64-bit int and a void *, for better portability
42 1.1 oster * attempted hpux port, but failed partway through for
43 1.1 oster * lack of a single C compiler capable of compiling all
44 1.1 oster * source files
45 1.1 oster *
46 1.1 oster * Revision 1.22 1996/06/11 10:17:33 jimz
47 1.1 oster * Put in thread startup/shutdown mechanism for proper synchronization
48 1.1 oster * with start and end of day routines.
49 1.1 oster *
50 1.1 oster * Revision 1.21 1996/06/09 02:36:46 jimz
51 1.1 oster * lots of little crufty cleanup- fixup whitespace
52 1.1 oster * issues, comment #ifdefs, improve typing in some
53 1.1 oster * places (esp size-related)
54 1.1 oster *
55 1.1 oster * Revision 1.20 1996/06/07 21:33:04 jimz
56 1.1 oster * begin using consistent types for sector numbers,
57 1.1 oster * stripe numbers, row+col numbers, recon unit numbers
58 1.1 oster *
59 1.1 oster * Revision 1.19 1996/06/05 18:06:02 jimz
60 1.1 oster * Major code cleanup. The Great Renaming is now done.
61 1.1 oster * Better modularity. Better typing. Fixed a bunch of
62 1.1 oster * synchronization bugs. Made a lot of global stuff
63 1.1 oster * per-desc or per-array. Removed dead code.
64 1.1 oster *
65 1.1 oster * Revision 1.18 1996/06/02 17:31:48 jimz
66 1.1 oster * Moved a lot of global stuff into array structure, where it belongs.
67 1.1 oster * Fixed up paritylogging, pss modules in this manner. Some general
68 1.1 oster * code cleanup. Removed lots of dead code, some dead files.
69 1.1 oster *
70 1.1 oster * Revision 1.17 1996/05/31 22:26:54 jimz
71 1.1 oster * fix a lot of mapping problems, memory allocation problems
72 1.1 oster * found some weird lock issues, fixed 'em
73 1.1 oster * more code cleanup
74 1.1 oster *
75 1.1 oster * Revision 1.16 1996/05/30 23:22:16 jimz
76 1.1 oster * bugfixes of serialization, timing problems
77 1.1 oster * more cleanup
78 1.1 oster *
79 1.1 oster * Revision 1.15 1996/05/30 12:59:18 jimz
80 1.1 oster * make etimer happier, more portable
81 1.1 oster *
82 1.1 oster * Revision 1.14 1996/05/30 11:29:41 jimz
83 1.1 oster * Numerous bug fixes. Stripe lock release code disagreed with the taking code
84 1.1 oster * about when stripes should be locked (I made it consistent: no parity, no lock)
85 1.1 oster * There was a lot of extra serialization of I/Os which I've removed- a lot of
86 1.1 oster * it was to calculate values for the cache code, which is no longer with us.
87 1.1 oster * More types, function, macro cleanup. Added code to properly quiesce the array
88 1.1 oster * on shutdown. Made a lot of stuff array-specific which was (bogusly) general
89 1.1 oster * before. Fixed memory allocation, freeing bugs.
90 1.1 oster *
91 1.1 oster * Revision 1.13 1996/05/27 18:56:37 jimz
92 1.1 oster * more code cleanup
93 1.1 oster * better typing
94 1.1 oster * compiles in all 3 environments
95 1.1 oster *
96 1.1 oster * Revision 1.12 1996/05/24 22:17:04 jimz
97 1.1 oster * continue code + namespace cleanup
98 1.1 oster * typed a bunch of flags
99 1.1 oster *
100 1.1 oster * Revision 1.11 1996/05/24 04:28:55 jimz
101 1.1 oster * release cleanup ckpt
102 1.1 oster *
103 1.1 oster * Revision 1.10 1996/05/23 21:46:35 jimz
104 1.1 oster * checkpoint in code cleanup (release prep)
105 1.1 oster * lots of types, function names have been fixed
106 1.1 oster *
107 1.1 oster * Revision 1.9 1996/05/23 00:33:23 jimz
108 1.1 oster * code cleanup: move all debug decls to rf_options.c, all extern
109 1.1 oster * debug decls to rf_options.h, all debug vars preceded by rf_
110 1.1 oster *
111 1.1 oster * Revision 1.8 1996/05/18 19:51:34 jimz
112 1.1 oster * major code cleanup- fix syntax, make some types consistent,
113 1.1 oster * add prototypes, clean out dead code, et cetera
114 1.1 oster *
115 1.1 oster * Revision 1.7 1995/12/12 18:10:06 jimz
116 1.1 oster * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
117 1.1 oster * fix 80-column brain damage in comments
118 1.1 oster *
119 1.1 oster * Revision 1.6 1995/12/06 20:58:27 wvcii
120 1.1 oster * added prototypes
121 1.1 oster *
122 1.1 oster * Revision 1.5 1995/11/30 16:06:05 wvcii
123 1.1 oster * added copyright info
124 1.1 oster *
125 1.1 oster * Revision 1.4 1995/10/09 22:41:10 wvcii
126 1.1 oster * minor bug fix
127 1.1 oster *
128 1.1 oster * Revision 1.3 1995/10/08 20:43:47 wvcii
129 1.1 oster * lots of random debugging - debugging still incomplete
130 1.1 oster *
131 1.1 oster * Revision 1.2 1995/09/07 15:52:19 jimz
132 1.1 oster * noop compile when INCLUDE_PARITYLOGGING not defined
133 1.1 oster *
134 1.1 oster * Revision 1.1 1995/09/06 19:24:44 wvcii
135 1.1 oster * Initial revision
136 1.1 oster *
137 1.1 oster */
138 1.1 oster
139 1.1 oster #include "rf_archs.h"
140 1.1 oster
141 1.1 oster #if RF_INCLUDE_PARITYLOGGING > 0
142 1.1 oster
143 1.1 oster #include "rf_types.h"
144 1.1 oster #include "rf_threadstuff.h"
145 1.1 oster #include "rf_mcpair.h"
146 1.1 oster #include "rf_raid.h"
147 1.1 oster #include "rf_dag.h"
148 1.1 oster #include "rf_dagfuncs.h"
149 1.1 oster #include "rf_desc.h"
150 1.1 oster #include "rf_layout.h"
151 1.1 oster #include "rf_diskqueue.h"
152 1.1 oster #include "rf_paritylog.h"
153 1.1 oster #include "rf_general.h"
154 1.1 oster #include "rf_threadid.h"
155 1.1 oster #include "rf_etimer.h"
156 1.1 oster #include "rf_paritylogging.h"
157 1.1 oster #include "rf_engine.h"
158 1.1 oster #include "rf_dagutils.h"
159 1.1 oster #include "rf_map.h"
160 1.1 oster #include "rf_parityscan.h"
161 1.1 oster #include "rf_sys.h"
162 1.1 oster
163 1.1 oster #include "rf_paritylogDiskMgr.h"
164 1.1 oster
165 1.1 oster static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *);
166 1.1 oster
167 1.1 oster static caddr_t AcquireReintBuffer(pool)
168 1.1 oster RF_RegionBufferQueue_t *pool;
169 1.1 oster {
170 1.1 oster caddr_t bufPtr = NULL;
171 1.1 oster
172 1.1 oster /* Return a region buffer from the free list (pool).
173 1.1 oster If the free list is empty, WAIT.
174 1.1 oster BLOCKING */
175 1.1 oster
176 1.1 oster RF_LOCK_MUTEX(pool->mutex);
177 1.1 oster if (pool->availableBuffers > 0) {
178 1.1 oster bufPtr = pool->buffers[pool->availBuffersIndex];
179 1.1 oster pool->availableBuffers--;
180 1.1 oster pool->availBuffersIndex++;
181 1.1 oster if (pool->availBuffersIndex == pool->totalBuffers)
182 1.1 oster pool->availBuffersIndex = 0;
183 1.1 oster RF_UNLOCK_MUTEX(pool->mutex);
184 1.1 oster }
185 1.1 oster else {
186 1.1 oster RF_PANIC(); /* should never happen in currect config, single reint */
187 1.1 oster RF_WAIT_COND(pool->cond, pool->mutex);
188 1.1 oster }
189 1.1 oster return(bufPtr);
190 1.1 oster }
191 1.1 oster
192 1.1 oster static void ReleaseReintBuffer(
193 1.1 oster RF_RegionBufferQueue_t *pool,
194 1.1 oster caddr_t bufPtr)
195 1.1 oster {
196 1.1 oster /* Insert a region buffer (bufPtr) into the free list (pool).
197 1.1 oster NON-BLOCKING */
198 1.1 oster
199 1.1 oster RF_LOCK_MUTEX(pool->mutex);
200 1.1 oster pool->availableBuffers++;
201 1.1 oster pool->buffers[pool->emptyBuffersIndex] = bufPtr;
202 1.1 oster pool->emptyBuffersIndex++;
203 1.1 oster if (pool->emptyBuffersIndex == pool->totalBuffers)
204 1.1 oster pool->emptyBuffersIndex = 0;
205 1.1 oster RF_ASSERT(pool->availableBuffers <= pool->totalBuffers);
206 1.1 oster RF_UNLOCK_MUTEX(pool->mutex);
207 1.1 oster RF_SIGNAL_COND(pool->cond);
208 1.1 oster }
209 1.1 oster
210 1.1 oster
211 1.1 oster
212 1.1 oster static void ReadRegionLog(
213 1.1 oster RF_RegionId_t regionID,
214 1.1 oster RF_MCPair_t *rrd_mcpair,
215 1.1 oster caddr_t regionBuffer,
216 1.1 oster RF_Raid_t *raidPtr,
217 1.1 oster RF_DagHeader_t **rrd_dag_h,
218 1.1 oster RF_AllocListElem_t **rrd_alloclist,
219 1.1 oster RF_PhysDiskAddr_t **rrd_pda)
220 1.1 oster {
221 1.1 oster /* Initiate the read a region log from disk. Once initiated, return
222 1.1 oster to the calling routine.
223 1.1 oster
224 1.1 oster NON-BLOCKING
225 1.1 oster */
226 1.1 oster
227 1.1 oster RF_AccTraceEntry_t tracerec;
228 1.1 oster RF_DagNode_t *rrd_rdNode;
229 1.1 oster
230 1.1 oster /* create DAG to read region log from disk */
231 1.1 oster rf_MakeAllocList(*rrd_alloclist);
232 1.1 oster *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, rf_DiskReadFunc, rf_DiskReadUndoFunc,
233 1.1 oster "Rrl", *rrd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
234 1.1 oster
235 1.1 oster /* create and initialize PDA for the core log */
236 1.1 oster /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
237 1.1 oster *rrd_pda = rf_AllocPDAList(1);
238 1.1 oster rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), &((*rrd_pda)->col), &((*rrd_pda)->startSector));
239 1.1 oster (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity;
240 1.1 oster
241 1.1 oster if ((*rrd_pda)->next) {
242 1.1 oster (*rrd_pda)->next = NULL;
243 1.1 oster printf("set rrd_pda->next to NULL\n");
244 1.1 oster }
245 1.1 oster
246 1.1 oster /* initialize DAG parameters */
247 1.1 oster bzero((char *)&tracerec,sizeof(tracerec));
248 1.1 oster (*rrd_dag_h)->tracerec = &tracerec;
249 1.1 oster rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0];
250 1.1 oster rrd_rdNode->params[0].p = *rrd_pda;
251 1.1 oster /* rrd_rdNode->params[1] = regionBuffer; */
252 1.1 oster rrd_rdNode->params[2].v = 0;
253 1.1 oster rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
254 1.1 oster
255 1.1 oster /* launch region log read dag */
256 1.1 oster rf_DispatchDAG(*rrd_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
257 1.1 oster (void *) rrd_mcpair);
258 1.1 oster }
259 1.1 oster
260 1.1 oster
261 1.1 oster
262 1.1 oster static void WriteCoreLog(
263 1.1 oster RF_ParityLog_t *log,
264 1.1 oster RF_MCPair_t *fwr_mcpair,
265 1.1 oster RF_Raid_t *raidPtr,
266 1.1 oster RF_DagHeader_t **fwr_dag_h,
267 1.1 oster RF_AllocListElem_t **fwr_alloclist,
268 1.1 oster RF_PhysDiskAddr_t **fwr_pda)
269 1.1 oster {
270 1.1 oster RF_RegionId_t regionID = log->regionID;
271 1.1 oster RF_AccTraceEntry_t tracerec;
272 1.1 oster RF_SectorNum_t regionOffset;
273 1.1 oster RF_DagNode_t *fwr_wrNode;
274 1.1 oster
275 1.1 oster /* Initiate the write of a core log to a region log disk.
276 1.1 oster Once initiated, return to the calling routine.
277 1.1 oster
278 1.1 oster NON-BLOCKING
279 1.1 oster */
280 1.1 oster
281 1.1 oster /* create DAG to write a core log to a region log disk */
282 1.1 oster rf_MakeAllocList(*fwr_alloclist);
283 1.1 oster *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
284 1.1 oster "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
285 1.1 oster
286 1.1 oster /* create and initialize PDA for the region log */
287 1.1 oster /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
288 1.1 oster *fwr_pda = rf_AllocPDAList(1);
289 1.1 oster regionOffset = log->diskOffset;
290 1.1 oster rf_MapLogParityLogging(raidPtr, regionID, regionOffset, &((*fwr_pda)->row), &((*fwr_pda)->col), &((*fwr_pda)->startSector));
291 1.1 oster (*fwr_pda)->numSector = raidPtr->numSectorsPerLog;
292 1.1 oster
293 1.1 oster /* initialize DAG parameters */
294 1.1 oster bzero((char *)&tracerec,sizeof(tracerec));
295 1.1 oster (*fwr_dag_h)->tracerec = &tracerec;
296 1.1 oster fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0];
297 1.1 oster fwr_wrNode->params[0].p = *fwr_pda;
298 1.1 oster /* fwr_wrNode->params[1] = log->bufPtr; */
299 1.1 oster fwr_wrNode->params[2].v = 0;
300 1.1 oster fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
301 1.1 oster
302 1.1 oster /* launch the dag to write the core log to disk */
303 1.1 oster rf_DispatchDAG(*fwr_dag_h, (void (*)(void *)) rf_MCPairWakeupFunc,
304 1.1 oster (void *) fwr_mcpair);
305 1.1 oster }
306 1.1 oster
307 1.1 oster
308 1.1 oster static void ReadRegionParity(
309 1.1 oster RF_RegionId_t regionID,
310 1.1 oster RF_MCPair_t *prd_mcpair,
311 1.1 oster caddr_t parityBuffer,
312 1.1 oster RF_Raid_t *raidPtr,
313 1.1 oster RF_DagHeader_t **prd_dag_h,
314 1.1 oster RF_AllocListElem_t **prd_alloclist,
315 1.1 oster RF_PhysDiskAddr_t **prd_pda)
316 1.1 oster {
317 1.1 oster /* Initiate the read region parity from disk.
318 1.1 oster Once initiated, return to the calling routine.
319 1.1 oster
320 1.1 oster NON-BLOCKING
321 1.1 oster */
322 1.1 oster
323 1.1 oster RF_AccTraceEntry_t tracerec;
324 1.1 oster RF_DagNode_t *prd_rdNode;
325 1.1 oster
326 1.1 oster /* create DAG to read region parity from disk */
327 1.1 oster rf_MakeAllocList(*prd_alloclist);
328 1.1 oster *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, rf_DiskReadUndoFunc,
329 1.1 oster "Rrp", *prd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
330 1.1 oster
331 1.1 oster /* create and initialize PDA for region parity */
332 1.1 oster /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
333 1.1 oster *prd_pda = rf_AllocPDAList(1);
334 1.1 oster rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), &((*prd_pda)->col), &((*prd_pda)->startSector), &((*prd_pda)->numSector));
335 1.1 oster if (rf_parityLogDebug)
336 1.1 oster printf("[reading %d sectors of parity from region %d]\n",
337 1.1 oster (int)(*prd_pda)->numSector, regionID);
338 1.1 oster if ((*prd_pda)->next) {
339 1.1 oster (*prd_pda)->next = NULL;
340 1.1 oster printf("set prd_pda->next to NULL\n");
341 1.1 oster }
342 1.1 oster
343 1.1 oster /* initialize DAG parameters */
344 1.1 oster bzero((char *)&tracerec,sizeof(tracerec));
345 1.1 oster (*prd_dag_h)->tracerec = &tracerec;
346 1.1 oster prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0];
347 1.1 oster prd_rdNode->params[0].p = *prd_pda;
348 1.1 oster prd_rdNode->params[1].p = parityBuffer;
349 1.1 oster prd_rdNode->params[2].v = 0;
350 1.1 oster prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
351 1.1 oster if (rf_validateDAGDebug)
352 1.1 oster rf_ValidateDAG(*prd_dag_h);
353 1.1 oster /* launch region parity read dag */
354 1.1 oster rf_DispatchDAG(*prd_dag_h, (void (*)(void *)) rf_MCPairWakeupFunc,
355 1.1 oster (void *) prd_mcpair);
356 1.1 oster }
357 1.1 oster
358 1.1 oster static void WriteRegionParity(
359 1.1 oster RF_RegionId_t regionID,
360 1.1 oster RF_MCPair_t *pwr_mcpair,
361 1.1 oster caddr_t parityBuffer,
362 1.1 oster RF_Raid_t *raidPtr,
363 1.1 oster RF_DagHeader_t **pwr_dag_h,
364 1.1 oster RF_AllocListElem_t **pwr_alloclist,
365 1.1 oster RF_PhysDiskAddr_t **pwr_pda)
366 1.1 oster {
367 1.1 oster /* Initiate the write of region parity to disk.
368 1.1 oster Once initiated, return to the calling routine.
369 1.1 oster
370 1.1 oster NON-BLOCKING
371 1.1 oster */
372 1.1 oster
373 1.1 oster RF_AccTraceEntry_t tracerec;
374 1.1 oster RF_DagNode_t *pwr_wrNode;
375 1.1 oster
376 1.1 oster /* create DAG to write region log from disk */
377 1.1 oster rf_MakeAllocList(*pwr_alloclist);
378 1.1 oster *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
379 1.1 oster "Wrp", *pwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
380 1.1 oster
381 1.1 oster /* create and initialize PDA for region parity */
382 1.1 oster /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
383 1.1 oster *pwr_pda = rf_AllocPDAList(1);
384 1.1 oster rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), &((*pwr_pda)->col), &((*pwr_pda)->startSector), &((*pwr_pda)->numSector));
385 1.1 oster
386 1.1 oster /* initialize DAG parameters */
387 1.1 oster bzero((char *)&tracerec,sizeof(tracerec));
388 1.1 oster (*pwr_dag_h)->tracerec = &tracerec;
389 1.1 oster pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0];
390 1.1 oster pwr_wrNode->params[0].p = *pwr_pda;
391 1.1 oster /* pwr_wrNode->params[1] = parityBuffer; */
392 1.1 oster pwr_wrNode->params[2].v = 0;
393 1.1 oster pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
394 1.1 oster
395 1.1 oster /* launch the dag to write region parity to disk */
396 1.1 oster rf_DispatchDAG(*pwr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
397 1.1 oster (void *) pwr_mcpair);
398 1.1 oster }
399 1.1 oster
400 1.1 oster static void FlushLogsToDisk(
401 1.1 oster RF_Raid_t *raidPtr,
402 1.1 oster RF_ParityLog_t *logList)
403 1.1 oster {
404 1.1 oster /* Flush a linked list of core logs to the log disk.
405 1.1 oster Logs contain the disk location where they should be
406 1.1 oster written. Logs were written in FIFO order and that
407 1.1 oster order must be preserved.
408 1.1 oster
409 1.1 oster Recommended optimizations:
410 1.1 oster 1) allow multiple flushes to occur simultaneously
411 1.1 oster 2) coalesce contiguous flush operations
412 1.1 oster
413 1.1 oster BLOCKING
414 1.1 oster */
415 1.1 oster
416 1.1 oster RF_ParityLog_t *log;
417 1.1 oster RF_RegionId_t regionID;
418 1.1 oster RF_MCPair_t *fwr_mcpair;
419 1.1 oster RF_DagHeader_t *fwr_dag_h;
420 1.1 oster RF_AllocListElem_t *fwr_alloclist;
421 1.1 oster RF_PhysDiskAddr_t *fwr_pda;
422 1.1 oster
423 1.1 oster fwr_mcpair = rf_AllocMCPair();
424 1.1 oster RF_LOCK_MUTEX(fwr_mcpair->mutex);
425 1.1 oster
426 1.1 oster RF_ASSERT(logList);
427 1.1 oster log = logList;
428 1.1 oster while (log)
429 1.1 oster {
430 1.1 oster regionID = log->regionID;
431 1.1 oster
432 1.1 oster /* create and launch a DAG to write the core log */
433 1.1 oster if (rf_parityLogDebug)
434 1.1 oster printf("[initiating write of core log for region %d]\n", regionID);
435 1.1 oster fwr_mcpair->flag = RF_FALSE;
436 1.1 oster WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, &fwr_alloclist, &fwr_pda);
437 1.1 oster
438 1.1 oster /* wait for the DAG to complete */
439 1.1 oster #ifndef SIMULATE
440 1.1 oster while (!fwr_mcpair->flag)
441 1.1 oster RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex);
442 1.1 oster #endif /* !SIMULATE */
443 1.1 oster if (fwr_dag_h->status != rf_enable)
444 1.1 oster {
445 1.1 oster RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID);
446 1.1 oster RF_ASSERT(0);
447 1.1 oster }
448 1.1 oster
449 1.1 oster /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */
450 1.1 oster rf_FreePhysDiskAddr(fwr_pda);
451 1.1 oster rf_FreeDAG(fwr_dag_h);
452 1.1 oster rf_FreeAllocList(fwr_alloclist);
453 1.1 oster
454 1.1 oster log = log->next;
455 1.1 oster }
456 1.1 oster RF_UNLOCK_MUTEX(fwr_mcpair->mutex);
457 1.1 oster rf_FreeMCPair(fwr_mcpair);
458 1.1 oster rf_ReleaseParityLogs(raidPtr, logList);
459 1.1 oster }
460 1.1 oster
461 1.1 oster static void ReintegrateRegion(
462 1.1 oster RF_Raid_t *raidPtr,
463 1.1 oster RF_RegionId_t regionID,
464 1.1 oster RF_ParityLog_t *coreLog)
465 1.1 oster {
466 1.1 oster RF_MCPair_t *rrd_mcpair=NULL, *prd_mcpair, *pwr_mcpair;
467 1.1 oster RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h;
468 1.1 oster RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist;
469 1.1 oster RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda;
470 1.1 oster caddr_t parityBuffer, regionBuffer=NULL;
471 1.1 oster
472 1.1 oster /* Reintegrate a region (regionID).
473 1.1 oster 1. acquire region and parity buffers
474 1.1 oster 2. read log from disk
475 1.1 oster 3. read parity from disk
476 1.1 oster 4. apply log to parity
477 1.1 oster 5. apply core log to parity
478 1.1 oster 6. write new parity to disk
479 1.1 oster
480 1.1 oster BLOCKING
481 1.1 oster */
482 1.1 oster
483 1.1 oster if (rf_parityLogDebug)
484 1.1 oster printf("[reintegrating region %d]\n", regionID);
485 1.1 oster
486 1.1 oster /* initiate read of region parity */
487 1.1 oster if (rf_parityLogDebug)
488 1.1 oster printf("[initiating read of parity for region %d]\n", regionID);
489 1.1 oster parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool);
490 1.1 oster prd_mcpair = rf_AllocMCPair();
491 1.1 oster RF_LOCK_MUTEX(prd_mcpair->mutex);
492 1.1 oster prd_mcpair->flag = RF_FALSE;
493 1.1 oster ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, &prd_dag_h, &prd_alloclist, &prd_pda);
494 1.1 oster
495 1.1 oster /* if region log nonempty, initiate read */
496 1.1 oster if (raidPtr->regionInfo[regionID].diskCount > 0)
497 1.1 oster {
498 1.1 oster if (rf_parityLogDebug)
499 1.1 oster printf("[initiating read of disk log for region %d]\n", regionID);
500 1.1 oster regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool);
501 1.1 oster rrd_mcpair = rf_AllocMCPair();
502 1.1 oster RF_LOCK_MUTEX(rrd_mcpair->mutex);
503 1.1 oster rrd_mcpair->flag = RF_FALSE;
504 1.1 oster ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, &rrd_dag_h, &rrd_alloclist, &rrd_pda);
505 1.1 oster }
506 1.1 oster
507 1.1 oster /* wait on read of region parity to complete */
508 1.1 oster #ifndef SIMULATE
509 1.1 oster while (!prd_mcpair->flag) {
510 1.1 oster RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex);
511 1.1 oster }
512 1.1 oster #endif /* !SIMULATE */
513 1.1 oster RF_UNLOCK_MUTEX(prd_mcpair->mutex);
514 1.1 oster if (prd_dag_h->status != rf_enable)
515 1.1 oster {
516 1.1 oster RF_ERRORMSG("Unable to read parity from disk\n");
517 1.1 oster /* add code to fail the parity disk */
518 1.1 oster RF_ASSERT(0);
519 1.1 oster }
520 1.1 oster
521 1.1 oster /* apply core log to parity */
522 1.1 oster /* if (coreLog)
523 1.1 oster ApplyLogsToParity(coreLog, parityBuffer); */
524 1.1 oster
525 1.1 oster if (raidPtr->regionInfo[regionID].diskCount > 0)
526 1.1 oster {
527 1.1 oster /* wait on read of region log to complete */
528 1.1 oster #ifndef SIMULATE
529 1.1 oster while (!rrd_mcpair->flag)
530 1.1 oster RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex);
531 1.1 oster #endif /* !SIMULATE */
532 1.1 oster RF_UNLOCK_MUTEX(rrd_mcpair->mutex);
533 1.1 oster if (rrd_dag_h->status != rf_enable)
534 1.1 oster {
535 1.1 oster RF_ERRORMSG("Unable to read region log from disk\n");
536 1.1 oster /* add code to fail the log disk */
537 1.1 oster RF_ASSERT(0);
538 1.1 oster }
539 1.1 oster /* apply region log to parity */
540 1.1 oster /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */
541 1.1 oster /* release resources associated with region log */
542 1.1 oster /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */
543 1.1 oster rf_FreePhysDiskAddr(rrd_pda);
544 1.1 oster rf_FreeDAG(rrd_dag_h);
545 1.1 oster rf_FreeAllocList(rrd_alloclist);
546 1.1 oster rf_FreeMCPair(rrd_mcpair);
547 1.1 oster ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer);
548 1.1 oster }
549 1.1 oster
550 1.1 oster /* write reintegrated parity to disk */
551 1.1 oster if (rf_parityLogDebug)
552 1.1 oster printf("[initiating write of parity for region %d]\n", regionID);
553 1.1 oster pwr_mcpair = rf_AllocMCPair();
554 1.1 oster RF_LOCK_MUTEX(pwr_mcpair->mutex);
555 1.1 oster pwr_mcpair->flag = RF_FALSE;
556 1.1 oster WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, &pwr_dag_h, &pwr_alloclist, &pwr_pda);
557 1.1 oster #ifndef SIMULATE
558 1.1 oster while (!pwr_mcpair->flag)
559 1.1 oster RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex);
560 1.1 oster #endif /* !SIMULATE */
561 1.1 oster RF_UNLOCK_MUTEX(pwr_mcpair->mutex);
562 1.1 oster if (pwr_dag_h->status != rf_enable)
563 1.1 oster {
564 1.1 oster RF_ERRORMSG("Unable to write parity to disk\n");
565 1.1 oster /* add code to fail the parity disk */
566 1.1 oster RF_ASSERT(0);
567 1.1 oster }
568 1.1 oster
569 1.1 oster /* release resources associated with read of old parity */
570 1.1 oster /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */
571 1.1 oster rf_FreePhysDiskAddr(prd_pda);
572 1.1 oster rf_FreeDAG(prd_dag_h);
573 1.1 oster rf_FreeAllocList(prd_alloclist);
574 1.1 oster rf_FreeMCPair(prd_mcpair);
575 1.1 oster
576 1.1 oster /* release resources associated with write of new parity */
577 1.1 oster ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer);
578 1.1 oster /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */
579 1.1 oster rf_FreePhysDiskAddr(pwr_pda);
580 1.1 oster rf_FreeDAG(pwr_dag_h);
581 1.1 oster rf_FreeAllocList(pwr_alloclist);
582 1.1 oster rf_FreeMCPair(pwr_mcpair);
583 1.1 oster
584 1.1 oster if (rf_parityLogDebug)
585 1.1 oster printf("[finished reintegrating region %d]\n", regionID);
586 1.1 oster }
587 1.1 oster
588 1.1 oster
589 1.1 oster
590 1.1 oster static void ReintegrateLogs(
591 1.1 oster RF_Raid_t *raidPtr,
592 1.1 oster RF_ParityLog_t *logList)
593 1.1 oster {
594 1.1 oster RF_ParityLog_t *log, *freeLogList = NULL;
595 1.1 oster RF_ParityLogData_t *logData, *logDataList;
596 1.1 oster RF_RegionId_t regionID;
597 1.1 oster
598 1.1 oster RF_ASSERT(logList);
599 1.1 oster while (logList)
600 1.1 oster {
601 1.1 oster log = logList;
602 1.1 oster logList = logList->next;
603 1.1 oster log->next = NULL;
604 1.1 oster regionID = log->regionID;
605 1.1 oster ReintegrateRegion(raidPtr, regionID, log);
606 1.1 oster log->numRecords = 0;
607 1.1 oster
608 1.1 oster /* remove all items which are blocked on reintegration of this region */
609 1.1 oster RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
610 1.1 oster logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
611 1.1 oster logDataList = logData;
612 1.1 oster while (logData)
613 1.1 oster {
614 1.1 oster logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
615 1.1 oster logData = logData->next;
616 1.1 oster }
617 1.1 oster RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
618 1.1 oster
619 1.1 oster /* process blocked log data and clear reintInProgress flag for this region */
620 1.1 oster if (logDataList)
621 1.1 oster rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE);
622 1.1 oster else
623 1.1 oster {
624 1.1 oster /* Enable flushing for this region. Holding both locks provides
625 1.1 oster a synchronization barrier with DumpParityLogToDisk
626 1.1 oster */
627 1.1 oster RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
628 1.1 oster RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
629 1.1 oster RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
630 1.1 oster raidPtr->regionInfo[regionID].diskCount = 0;
631 1.1 oster raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
632 1.1 oster RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
633 1.1 oster RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now enabled */
634 1.1 oster RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
635 1.1 oster }
636 1.1 oster /* if log wasn't used, attach it to the list of logs to be returned */
637 1.1 oster if (log)
638 1.1 oster {
639 1.1 oster log->next = freeLogList;
640 1.1 oster freeLogList = log;
641 1.1 oster }
642 1.1 oster }
643 1.1 oster if (freeLogList)
644 1.1 oster rf_ReleaseParityLogs(raidPtr, freeLogList);
645 1.1 oster }
646 1.1 oster
647 1.1 oster int rf_ShutdownLogging(RF_Raid_t *raidPtr)
648 1.1 oster {
649 1.1 oster /* shutdown parity logging
650 1.1 oster 1) disable parity logging in all regions
651 1.1 oster 2) reintegrate all regions
652 1.1 oster */
653 1.1 oster
654 1.1 oster RF_SectorCount_t diskCount;
655 1.1 oster RF_RegionId_t regionID;
656 1.1 oster RF_ParityLog_t *log;
657 1.1 oster
658 1.1 oster if (rf_parityLogDebug)
659 1.1 oster printf("[shutting down parity logging]\n");
660 1.1 oster /* Since parity log maps are volatile, we must reintegrate all regions. */
661 1.1 oster if (rf_forceParityLogReint) {
662 1.1 oster for (regionID = 0; regionID < rf_numParityRegions; regionID++)
663 1.1 oster {
664 1.1 oster RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
665 1.1 oster raidPtr->regionInfo[regionID].loggingEnabled = RF_FALSE;
666 1.1 oster log = raidPtr->regionInfo[regionID].coreLog;
667 1.1 oster raidPtr->regionInfo[regionID].coreLog = NULL;
668 1.1 oster diskCount = raidPtr->regionInfo[regionID].diskCount;
669 1.1 oster RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
670 1.1 oster if (diskCount > 0 || log != NULL)
671 1.1 oster ReintegrateRegion(raidPtr, regionID, log);
672 1.1 oster if (log != NULL)
673 1.1 oster rf_ReleaseParityLogs(raidPtr, log);
674 1.1 oster }
675 1.1 oster }
676 1.1 oster if (rf_parityLogDebug)
677 1.1 oster {
678 1.1 oster printf("[parity logging disabled]\n");
679 1.1 oster printf("[should be done!]\n");
680 1.1 oster }
681 1.1 oster return(0);
682 1.1 oster }
683 1.1 oster
684 1.1 oster int rf_ParityLoggingDiskManager(RF_Raid_t *raidPtr)
685 1.1 oster {
686 1.1 oster RF_ParityLog_t *reintQueue, *flushQueue;
687 1.1 oster int workNeeded, done = RF_FALSE;
688 1.1 oster
689 1.1 oster rf_assign_threadid(); /* don't remove this line */
690 1.1 oster
691 1.1 oster /* Main program for parity logging disk thread. This routine waits
692 1.1 oster for work to appear in either the flush or reintegration queues
693 1.1 oster and is responsible for flushing core logs to the log disk as
694 1.1 oster well as reintegrating parity regions.
695 1.1 oster
696 1.1 oster BLOCKING
697 1.1 oster */
698 1.1 oster
699 1.1 oster RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
700 1.1 oster
701 1.1 oster /*
702 1.1 oster * Inform our creator that we're running. Don't bother doing the
703 1.1 oster * mutex lock/unlock dance- we locked above, and we'll unlock
704 1.1 oster * below with nothing to do, yet.
705 1.1 oster */
706 1.1 oster raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING;
707 1.1 oster RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
708 1.1 oster
709 1.1 oster /* empty the work queues */
710 1.1 oster flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL;
711 1.1 oster reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL;
712 1.1 oster workNeeded = (flushQueue || reintQueue);
713 1.1 oster
714 1.1 oster while (!done)
715 1.1 oster {
716 1.1 oster while (workNeeded)
717 1.1 oster {
718 1.1 oster /* First, flush all logs in the flush queue, freeing buffers
719 1.1 oster Second, reintegrate all regions which are reported as full.
720 1.1 oster Third, append queued log data until blocked.
721 1.1 oster
722 1.1 oster Note: Incoming appends (ParityLogAppend) can block on either
723 1.1 oster 1. empty buffer pool
724 1.1 oster 2. region under reintegration
725 1.1 oster To preserve a global FIFO ordering of appends, buffers are not
726 1.1 oster released to the world until those appends blocked on buffers are
727 1.1 oster removed from the append queue. Similarly, regions which are
728 1.1 oster reintegrated are not opened for general use until the append
729 1.1 oster queue has been emptied.
730 1.1 oster */
731 1.1 oster
732 1.1 oster RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
733 1.1 oster
734 1.1 oster /* empty flushQueue, using free'd log buffers to process bufTail */
735 1.1 oster if (flushQueue)
736 1.1 oster FlushLogsToDisk(raidPtr, flushQueue);
737 1.1 oster
738 1.1 oster /* empty reintQueue, flushing from reintTail as we go */
739 1.1 oster if (reintQueue)
740 1.1 oster ReintegrateLogs(raidPtr, reintQueue);
741 1.1 oster
742 1.1 oster RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
743 1.1 oster flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL;
744 1.1 oster reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL;
745 1.1 oster workNeeded = (flushQueue || reintQueue);
746 1.1 oster }
747 1.1 oster /* no work is needed at this point */
748 1.1 oster if (raidPtr->parityLogDiskQueue.threadState&RF_PLOG_TERMINATE)
749 1.1 oster {
750 1.1 oster /* shutdown parity logging
751 1.1 oster 1. disable parity logging in all regions
752 1.1 oster 2. reintegrate all regions
753 1.1 oster */
754 1.1 oster done = RF_TRUE; /* thread disabled, no work needed */
755 1.1 oster RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
756 1.1 oster rf_ShutdownLogging(raidPtr);
757 1.1 oster }
758 1.1 oster if (!done)
759 1.1 oster {
760 1.1 oster /* thread enabled, no work needed, so sleep */
761 1.1 oster if (rf_parityLogDebug)
762 1.1 oster printf("[parity logging disk manager sleeping]\n");
763 1.1 oster RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex);
764 1.1 oster if (rf_parityLogDebug)
765 1.1 oster printf("[parity logging disk manager just woke up]\n");
766 1.1 oster flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL;
767 1.1 oster reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL;
768 1.1 oster workNeeded = (flushQueue || reintQueue);
769 1.1 oster }
770 1.1 oster }
771 1.1 oster /*
772 1.1 oster * Announce that we're done.
773 1.1 oster */
774 1.1 oster RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
775 1.1 oster raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN;
776 1.1 oster RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
777 1.1 oster RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
778 1.1 oster #if defined(__NetBSD__) && defined(_KERNEL)
779 1.1 oster /*
780 1.1 oster * In the NetBSD kernel, the thread must exit; returning would
781 1.1 oster * cause the proc trampoline to attempt to return to userspace.
782 1.1 oster */
783 1.1 oster kthread_exit(0); /* does not return */
784 1.1 oster #else
785 1.1 oster return(0);
786 1.1 oster #endif
787 1.1 oster }
788 1.1 oster
789 1.1 oster #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
790