rf_paritylogDiskMgr.c revision 1.2 1 1.2 oster /* $NetBSD: rf_paritylogDiskMgr.c,v 1.2 1999/01/26 02:33:59 oster Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Author: William V. Courtright II
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster /* Code for flushing and reintegration operations related to parity logging.
29 1.1 oster *
30 1.1 oster */
31 1.1 oster
32 1.1 oster #include "rf_archs.h"
33 1.1 oster
34 1.1 oster #if RF_INCLUDE_PARITYLOGGING > 0
35 1.1 oster
36 1.1 oster #include "rf_types.h"
37 1.1 oster #include "rf_threadstuff.h"
38 1.1 oster #include "rf_mcpair.h"
39 1.1 oster #include "rf_raid.h"
40 1.1 oster #include "rf_dag.h"
41 1.1 oster #include "rf_dagfuncs.h"
42 1.1 oster #include "rf_desc.h"
43 1.1 oster #include "rf_layout.h"
44 1.1 oster #include "rf_diskqueue.h"
45 1.1 oster #include "rf_paritylog.h"
46 1.1 oster #include "rf_general.h"
47 1.1 oster #include "rf_threadid.h"
48 1.1 oster #include "rf_etimer.h"
49 1.1 oster #include "rf_paritylogging.h"
50 1.1 oster #include "rf_engine.h"
51 1.1 oster #include "rf_dagutils.h"
52 1.1 oster #include "rf_map.h"
53 1.1 oster #include "rf_parityscan.h"
54 1.1 oster #include "rf_sys.h"
55 1.1 oster
56 1.1 oster #include "rf_paritylogDiskMgr.h"
57 1.1 oster
58 1.1 oster static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *);
59 1.1 oster
60 1.1 oster static caddr_t AcquireReintBuffer(pool)
61 1.1 oster RF_RegionBufferQueue_t *pool;
62 1.1 oster {
63 1.1 oster caddr_t bufPtr = NULL;
64 1.1 oster
65 1.1 oster /* Return a region buffer from the free list (pool).
66 1.1 oster If the free list is empty, WAIT.
67 1.1 oster BLOCKING */
68 1.1 oster
69 1.1 oster RF_LOCK_MUTEX(pool->mutex);
70 1.1 oster if (pool->availableBuffers > 0) {
71 1.1 oster bufPtr = pool->buffers[pool->availBuffersIndex];
72 1.1 oster pool->availableBuffers--;
73 1.1 oster pool->availBuffersIndex++;
74 1.1 oster if (pool->availBuffersIndex == pool->totalBuffers)
75 1.1 oster pool->availBuffersIndex = 0;
76 1.1 oster RF_UNLOCK_MUTEX(pool->mutex);
77 1.1 oster }
78 1.1 oster else {
79 1.1 oster RF_PANIC(); /* should never happen in currect config, single reint */
80 1.1 oster RF_WAIT_COND(pool->cond, pool->mutex);
81 1.1 oster }
82 1.1 oster return(bufPtr);
83 1.1 oster }
84 1.1 oster
85 1.1 oster static void ReleaseReintBuffer(
86 1.1 oster RF_RegionBufferQueue_t *pool,
87 1.1 oster caddr_t bufPtr)
88 1.1 oster {
89 1.1 oster /* Insert a region buffer (bufPtr) into the free list (pool).
90 1.1 oster NON-BLOCKING */
91 1.1 oster
92 1.1 oster RF_LOCK_MUTEX(pool->mutex);
93 1.1 oster pool->availableBuffers++;
94 1.1 oster pool->buffers[pool->emptyBuffersIndex] = bufPtr;
95 1.1 oster pool->emptyBuffersIndex++;
96 1.1 oster if (pool->emptyBuffersIndex == pool->totalBuffers)
97 1.1 oster pool->emptyBuffersIndex = 0;
98 1.1 oster RF_ASSERT(pool->availableBuffers <= pool->totalBuffers);
99 1.1 oster RF_UNLOCK_MUTEX(pool->mutex);
100 1.1 oster RF_SIGNAL_COND(pool->cond);
101 1.1 oster }
102 1.1 oster
103 1.1 oster
104 1.1 oster
105 1.1 oster static void ReadRegionLog(
106 1.1 oster RF_RegionId_t regionID,
107 1.1 oster RF_MCPair_t *rrd_mcpair,
108 1.1 oster caddr_t regionBuffer,
109 1.1 oster RF_Raid_t *raidPtr,
110 1.1 oster RF_DagHeader_t **rrd_dag_h,
111 1.1 oster RF_AllocListElem_t **rrd_alloclist,
112 1.1 oster RF_PhysDiskAddr_t **rrd_pda)
113 1.1 oster {
114 1.1 oster /* Initiate the read a region log from disk. Once initiated, return
115 1.1 oster to the calling routine.
116 1.1 oster
117 1.1 oster NON-BLOCKING
118 1.1 oster */
119 1.1 oster
120 1.1 oster RF_AccTraceEntry_t tracerec;
121 1.1 oster RF_DagNode_t *rrd_rdNode;
122 1.1 oster
123 1.1 oster /* create DAG to read region log from disk */
124 1.1 oster rf_MakeAllocList(*rrd_alloclist);
125 1.1 oster *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, rf_DiskReadFunc, rf_DiskReadUndoFunc,
126 1.1 oster "Rrl", *rrd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
127 1.1 oster
128 1.1 oster /* create and initialize PDA for the core log */
129 1.1 oster /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
130 1.1 oster *rrd_pda = rf_AllocPDAList(1);
131 1.1 oster rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), &((*rrd_pda)->col), &((*rrd_pda)->startSector));
132 1.1 oster (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity;
133 1.1 oster
134 1.1 oster if ((*rrd_pda)->next) {
135 1.1 oster (*rrd_pda)->next = NULL;
136 1.1 oster printf("set rrd_pda->next to NULL\n");
137 1.1 oster }
138 1.1 oster
139 1.1 oster /* initialize DAG parameters */
140 1.1 oster bzero((char *)&tracerec,sizeof(tracerec));
141 1.1 oster (*rrd_dag_h)->tracerec = &tracerec;
142 1.1 oster rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0];
143 1.1 oster rrd_rdNode->params[0].p = *rrd_pda;
144 1.1 oster /* rrd_rdNode->params[1] = regionBuffer; */
145 1.1 oster rrd_rdNode->params[2].v = 0;
146 1.1 oster rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
147 1.1 oster
148 1.1 oster /* launch region log read dag */
149 1.1 oster rf_DispatchDAG(*rrd_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
150 1.1 oster (void *) rrd_mcpair);
151 1.1 oster }
152 1.1 oster
153 1.1 oster
154 1.1 oster
155 1.1 oster static void WriteCoreLog(
156 1.1 oster RF_ParityLog_t *log,
157 1.1 oster RF_MCPair_t *fwr_mcpair,
158 1.1 oster RF_Raid_t *raidPtr,
159 1.1 oster RF_DagHeader_t **fwr_dag_h,
160 1.1 oster RF_AllocListElem_t **fwr_alloclist,
161 1.1 oster RF_PhysDiskAddr_t **fwr_pda)
162 1.1 oster {
163 1.1 oster RF_RegionId_t regionID = log->regionID;
164 1.1 oster RF_AccTraceEntry_t tracerec;
165 1.1 oster RF_SectorNum_t regionOffset;
166 1.1 oster RF_DagNode_t *fwr_wrNode;
167 1.1 oster
168 1.1 oster /* Initiate the write of a core log to a region log disk.
169 1.1 oster Once initiated, return to the calling routine.
170 1.1 oster
171 1.1 oster NON-BLOCKING
172 1.1 oster */
173 1.1 oster
174 1.1 oster /* create DAG to write a core log to a region log disk */
175 1.1 oster rf_MakeAllocList(*fwr_alloclist);
176 1.1 oster *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
177 1.1 oster "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
178 1.1 oster
179 1.1 oster /* create and initialize PDA for the region log */
180 1.1 oster /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
181 1.1 oster *fwr_pda = rf_AllocPDAList(1);
182 1.1 oster regionOffset = log->diskOffset;
183 1.1 oster rf_MapLogParityLogging(raidPtr, regionID, regionOffset, &((*fwr_pda)->row), &((*fwr_pda)->col), &((*fwr_pda)->startSector));
184 1.1 oster (*fwr_pda)->numSector = raidPtr->numSectorsPerLog;
185 1.1 oster
186 1.1 oster /* initialize DAG parameters */
187 1.1 oster bzero((char *)&tracerec,sizeof(tracerec));
188 1.1 oster (*fwr_dag_h)->tracerec = &tracerec;
189 1.1 oster fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0];
190 1.1 oster fwr_wrNode->params[0].p = *fwr_pda;
191 1.1 oster /* fwr_wrNode->params[1] = log->bufPtr; */
192 1.1 oster fwr_wrNode->params[2].v = 0;
193 1.1 oster fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
194 1.1 oster
195 1.1 oster /* launch the dag to write the core log to disk */
196 1.1 oster rf_DispatchDAG(*fwr_dag_h, (void (*)(void *)) rf_MCPairWakeupFunc,
197 1.1 oster (void *) fwr_mcpair);
198 1.1 oster }
199 1.1 oster
200 1.1 oster
201 1.1 oster static void ReadRegionParity(
202 1.1 oster RF_RegionId_t regionID,
203 1.1 oster RF_MCPair_t *prd_mcpair,
204 1.1 oster caddr_t parityBuffer,
205 1.1 oster RF_Raid_t *raidPtr,
206 1.1 oster RF_DagHeader_t **prd_dag_h,
207 1.1 oster RF_AllocListElem_t **prd_alloclist,
208 1.1 oster RF_PhysDiskAddr_t **prd_pda)
209 1.1 oster {
210 1.1 oster /* Initiate the read region parity from disk.
211 1.1 oster Once initiated, return to the calling routine.
212 1.1 oster
213 1.1 oster NON-BLOCKING
214 1.1 oster */
215 1.1 oster
216 1.1 oster RF_AccTraceEntry_t tracerec;
217 1.1 oster RF_DagNode_t *prd_rdNode;
218 1.1 oster
219 1.1 oster /* create DAG to read region parity from disk */
220 1.1 oster rf_MakeAllocList(*prd_alloclist);
221 1.1 oster *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, rf_DiskReadUndoFunc,
222 1.1 oster "Rrp", *prd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
223 1.1 oster
224 1.1 oster /* create and initialize PDA for region parity */
225 1.1 oster /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
226 1.1 oster *prd_pda = rf_AllocPDAList(1);
227 1.1 oster rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), &((*prd_pda)->col), &((*prd_pda)->startSector), &((*prd_pda)->numSector));
228 1.1 oster if (rf_parityLogDebug)
229 1.1 oster printf("[reading %d sectors of parity from region %d]\n",
230 1.1 oster (int)(*prd_pda)->numSector, regionID);
231 1.1 oster if ((*prd_pda)->next) {
232 1.1 oster (*prd_pda)->next = NULL;
233 1.1 oster printf("set prd_pda->next to NULL\n");
234 1.1 oster }
235 1.1 oster
236 1.1 oster /* initialize DAG parameters */
237 1.1 oster bzero((char *)&tracerec,sizeof(tracerec));
238 1.1 oster (*prd_dag_h)->tracerec = &tracerec;
239 1.1 oster prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0];
240 1.1 oster prd_rdNode->params[0].p = *prd_pda;
241 1.1 oster prd_rdNode->params[1].p = parityBuffer;
242 1.1 oster prd_rdNode->params[2].v = 0;
243 1.1 oster prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
244 1.1 oster if (rf_validateDAGDebug)
245 1.1 oster rf_ValidateDAG(*prd_dag_h);
246 1.1 oster /* launch region parity read dag */
247 1.1 oster rf_DispatchDAG(*prd_dag_h, (void (*)(void *)) rf_MCPairWakeupFunc,
248 1.1 oster (void *) prd_mcpair);
249 1.1 oster }
250 1.1 oster
251 1.1 oster static void WriteRegionParity(
252 1.1 oster RF_RegionId_t regionID,
253 1.1 oster RF_MCPair_t *pwr_mcpair,
254 1.1 oster caddr_t parityBuffer,
255 1.1 oster RF_Raid_t *raidPtr,
256 1.1 oster RF_DagHeader_t **pwr_dag_h,
257 1.1 oster RF_AllocListElem_t **pwr_alloclist,
258 1.1 oster RF_PhysDiskAddr_t **pwr_pda)
259 1.1 oster {
260 1.1 oster /* Initiate the write of region parity to disk.
261 1.1 oster Once initiated, return to the calling routine.
262 1.1 oster
263 1.1 oster NON-BLOCKING
264 1.1 oster */
265 1.1 oster
266 1.1 oster RF_AccTraceEntry_t tracerec;
267 1.1 oster RF_DagNode_t *pwr_wrNode;
268 1.1 oster
269 1.1 oster /* create DAG to write region log from disk */
270 1.1 oster rf_MakeAllocList(*pwr_alloclist);
271 1.1 oster *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
272 1.1 oster "Wrp", *pwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
273 1.1 oster
274 1.1 oster /* create and initialize PDA for region parity */
275 1.1 oster /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
276 1.1 oster *pwr_pda = rf_AllocPDAList(1);
277 1.1 oster rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), &((*pwr_pda)->col), &((*pwr_pda)->startSector), &((*pwr_pda)->numSector));
278 1.1 oster
279 1.1 oster /* initialize DAG parameters */
280 1.1 oster bzero((char *)&tracerec,sizeof(tracerec));
281 1.1 oster (*pwr_dag_h)->tracerec = &tracerec;
282 1.1 oster pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0];
283 1.1 oster pwr_wrNode->params[0].p = *pwr_pda;
284 1.1 oster /* pwr_wrNode->params[1] = parityBuffer; */
285 1.1 oster pwr_wrNode->params[2].v = 0;
286 1.1 oster pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
287 1.1 oster
288 1.1 oster /* launch the dag to write region parity to disk */
289 1.1 oster rf_DispatchDAG(*pwr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
290 1.1 oster (void *) pwr_mcpair);
291 1.1 oster }
292 1.1 oster
293 1.1 oster static void FlushLogsToDisk(
294 1.1 oster RF_Raid_t *raidPtr,
295 1.1 oster RF_ParityLog_t *logList)
296 1.1 oster {
297 1.1 oster /* Flush a linked list of core logs to the log disk.
298 1.1 oster Logs contain the disk location where they should be
299 1.1 oster written. Logs were written in FIFO order and that
300 1.1 oster order must be preserved.
301 1.1 oster
302 1.1 oster Recommended optimizations:
303 1.1 oster 1) allow multiple flushes to occur simultaneously
304 1.1 oster 2) coalesce contiguous flush operations
305 1.1 oster
306 1.1 oster BLOCKING
307 1.1 oster */
308 1.1 oster
309 1.1 oster RF_ParityLog_t *log;
310 1.1 oster RF_RegionId_t regionID;
311 1.1 oster RF_MCPair_t *fwr_mcpair;
312 1.1 oster RF_DagHeader_t *fwr_dag_h;
313 1.1 oster RF_AllocListElem_t *fwr_alloclist;
314 1.1 oster RF_PhysDiskAddr_t *fwr_pda;
315 1.1 oster
316 1.1 oster fwr_mcpair = rf_AllocMCPair();
317 1.1 oster RF_LOCK_MUTEX(fwr_mcpair->mutex);
318 1.1 oster
319 1.1 oster RF_ASSERT(logList);
320 1.1 oster log = logList;
321 1.1 oster while (log)
322 1.1 oster {
323 1.1 oster regionID = log->regionID;
324 1.1 oster
325 1.1 oster /* create and launch a DAG to write the core log */
326 1.1 oster if (rf_parityLogDebug)
327 1.1 oster printf("[initiating write of core log for region %d]\n", regionID);
328 1.1 oster fwr_mcpair->flag = RF_FALSE;
329 1.1 oster WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, &fwr_alloclist, &fwr_pda);
330 1.1 oster
331 1.1 oster /* wait for the DAG to complete */
332 1.1 oster while (!fwr_mcpair->flag)
333 1.1 oster RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex);
334 1.1 oster if (fwr_dag_h->status != rf_enable)
335 1.1 oster {
336 1.1 oster RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID);
337 1.1 oster RF_ASSERT(0);
338 1.1 oster }
339 1.1 oster
340 1.1 oster /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */
341 1.1 oster rf_FreePhysDiskAddr(fwr_pda);
342 1.1 oster rf_FreeDAG(fwr_dag_h);
343 1.1 oster rf_FreeAllocList(fwr_alloclist);
344 1.1 oster
345 1.1 oster log = log->next;
346 1.1 oster }
347 1.1 oster RF_UNLOCK_MUTEX(fwr_mcpair->mutex);
348 1.1 oster rf_FreeMCPair(fwr_mcpair);
349 1.1 oster rf_ReleaseParityLogs(raidPtr, logList);
350 1.1 oster }
351 1.1 oster
352 1.1 oster static void ReintegrateRegion(
353 1.1 oster RF_Raid_t *raidPtr,
354 1.1 oster RF_RegionId_t regionID,
355 1.1 oster RF_ParityLog_t *coreLog)
356 1.1 oster {
357 1.1 oster RF_MCPair_t *rrd_mcpair=NULL, *prd_mcpair, *pwr_mcpair;
358 1.1 oster RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h;
359 1.1 oster RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist;
360 1.1 oster RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda;
361 1.1 oster caddr_t parityBuffer, regionBuffer=NULL;
362 1.1 oster
363 1.1 oster /* Reintegrate a region (regionID).
364 1.1 oster 1. acquire region and parity buffers
365 1.1 oster 2. read log from disk
366 1.1 oster 3. read parity from disk
367 1.1 oster 4. apply log to parity
368 1.1 oster 5. apply core log to parity
369 1.1 oster 6. write new parity to disk
370 1.1 oster
371 1.1 oster BLOCKING
372 1.1 oster */
373 1.1 oster
374 1.1 oster if (rf_parityLogDebug)
375 1.1 oster printf("[reintegrating region %d]\n", regionID);
376 1.1 oster
377 1.1 oster /* initiate read of region parity */
378 1.1 oster if (rf_parityLogDebug)
379 1.1 oster printf("[initiating read of parity for region %d]\n", regionID);
380 1.1 oster parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool);
381 1.1 oster prd_mcpair = rf_AllocMCPair();
382 1.1 oster RF_LOCK_MUTEX(prd_mcpair->mutex);
383 1.1 oster prd_mcpair->flag = RF_FALSE;
384 1.1 oster ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, &prd_dag_h, &prd_alloclist, &prd_pda);
385 1.1 oster
386 1.1 oster /* if region log nonempty, initiate read */
387 1.1 oster if (raidPtr->regionInfo[regionID].diskCount > 0)
388 1.1 oster {
389 1.1 oster if (rf_parityLogDebug)
390 1.1 oster printf("[initiating read of disk log for region %d]\n", regionID);
391 1.1 oster regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool);
392 1.1 oster rrd_mcpair = rf_AllocMCPair();
393 1.1 oster RF_LOCK_MUTEX(rrd_mcpair->mutex);
394 1.1 oster rrd_mcpair->flag = RF_FALSE;
395 1.1 oster ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, &rrd_dag_h, &rrd_alloclist, &rrd_pda);
396 1.1 oster }
397 1.1 oster
398 1.1 oster /* wait on read of region parity to complete */
399 1.1 oster while (!prd_mcpair->flag) {
400 1.1 oster RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex);
401 1.1 oster }
402 1.1 oster RF_UNLOCK_MUTEX(prd_mcpair->mutex);
403 1.1 oster if (prd_dag_h->status != rf_enable)
404 1.1 oster {
405 1.1 oster RF_ERRORMSG("Unable to read parity from disk\n");
406 1.1 oster /* add code to fail the parity disk */
407 1.1 oster RF_ASSERT(0);
408 1.1 oster }
409 1.1 oster
410 1.1 oster /* apply core log to parity */
411 1.1 oster /* if (coreLog)
412 1.1 oster ApplyLogsToParity(coreLog, parityBuffer); */
413 1.1 oster
414 1.1 oster if (raidPtr->regionInfo[regionID].diskCount > 0)
415 1.1 oster {
416 1.1 oster /* wait on read of region log to complete */
417 1.1 oster while (!rrd_mcpair->flag)
418 1.1 oster RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex);
419 1.1 oster RF_UNLOCK_MUTEX(rrd_mcpair->mutex);
420 1.1 oster if (rrd_dag_h->status != rf_enable)
421 1.1 oster {
422 1.1 oster RF_ERRORMSG("Unable to read region log from disk\n");
423 1.1 oster /* add code to fail the log disk */
424 1.1 oster RF_ASSERT(0);
425 1.1 oster }
426 1.1 oster /* apply region log to parity */
427 1.1 oster /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */
428 1.1 oster /* release resources associated with region log */
429 1.1 oster /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */
430 1.1 oster rf_FreePhysDiskAddr(rrd_pda);
431 1.1 oster rf_FreeDAG(rrd_dag_h);
432 1.1 oster rf_FreeAllocList(rrd_alloclist);
433 1.1 oster rf_FreeMCPair(rrd_mcpair);
434 1.1 oster ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer);
435 1.1 oster }
436 1.1 oster
437 1.1 oster /* write reintegrated parity to disk */
438 1.1 oster if (rf_parityLogDebug)
439 1.1 oster printf("[initiating write of parity for region %d]\n", regionID);
440 1.1 oster pwr_mcpair = rf_AllocMCPair();
441 1.1 oster RF_LOCK_MUTEX(pwr_mcpair->mutex);
442 1.1 oster pwr_mcpair->flag = RF_FALSE;
443 1.1 oster WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, &pwr_dag_h, &pwr_alloclist, &pwr_pda);
444 1.1 oster while (!pwr_mcpair->flag)
445 1.1 oster RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex);
446 1.1 oster RF_UNLOCK_MUTEX(pwr_mcpair->mutex);
447 1.1 oster if (pwr_dag_h->status != rf_enable)
448 1.1 oster {
449 1.1 oster RF_ERRORMSG("Unable to write parity to disk\n");
450 1.1 oster /* add code to fail the parity disk */
451 1.1 oster RF_ASSERT(0);
452 1.1 oster }
453 1.1 oster
454 1.1 oster /* release resources associated with read of old parity */
455 1.1 oster /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */
456 1.1 oster rf_FreePhysDiskAddr(prd_pda);
457 1.1 oster rf_FreeDAG(prd_dag_h);
458 1.1 oster rf_FreeAllocList(prd_alloclist);
459 1.1 oster rf_FreeMCPair(prd_mcpair);
460 1.1 oster
461 1.1 oster /* release resources associated with write of new parity */
462 1.1 oster ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer);
463 1.1 oster /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */
464 1.1 oster rf_FreePhysDiskAddr(pwr_pda);
465 1.1 oster rf_FreeDAG(pwr_dag_h);
466 1.1 oster rf_FreeAllocList(pwr_alloclist);
467 1.1 oster rf_FreeMCPair(pwr_mcpair);
468 1.1 oster
469 1.1 oster if (rf_parityLogDebug)
470 1.1 oster printf("[finished reintegrating region %d]\n", regionID);
471 1.1 oster }
472 1.1 oster
473 1.1 oster
474 1.1 oster
475 1.1 oster static void ReintegrateLogs(
476 1.1 oster RF_Raid_t *raidPtr,
477 1.1 oster RF_ParityLog_t *logList)
478 1.1 oster {
479 1.1 oster RF_ParityLog_t *log, *freeLogList = NULL;
480 1.1 oster RF_ParityLogData_t *logData, *logDataList;
481 1.1 oster RF_RegionId_t regionID;
482 1.1 oster
483 1.1 oster RF_ASSERT(logList);
484 1.1 oster while (logList)
485 1.1 oster {
486 1.1 oster log = logList;
487 1.1 oster logList = logList->next;
488 1.1 oster log->next = NULL;
489 1.1 oster regionID = log->regionID;
490 1.1 oster ReintegrateRegion(raidPtr, regionID, log);
491 1.1 oster log->numRecords = 0;
492 1.1 oster
493 1.1 oster /* remove all items which are blocked on reintegration of this region */
494 1.1 oster RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
495 1.1 oster logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
496 1.1 oster logDataList = logData;
497 1.1 oster while (logData)
498 1.1 oster {
499 1.1 oster logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
500 1.1 oster logData = logData->next;
501 1.1 oster }
502 1.1 oster RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
503 1.1 oster
504 1.1 oster /* process blocked log data and clear reintInProgress flag for this region */
505 1.1 oster if (logDataList)
506 1.1 oster rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE);
507 1.1 oster else
508 1.1 oster {
509 1.1 oster /* Enable flushing for this region. Holding both locks provides
510 1.1 oster a synchronization barrier with DumpParityLogToDisk
511 1.1 oster */
512 1.1 oster RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
513 1.1 oster RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
514 1.1 oster RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
515 1.1 oster raidPtr->regionInfo[regionID].diskCount = 0;
516 1.1 oster raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
517 1.1 oster RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
518 1.1 oster RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now enabled */
519 1.1 oster RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
520 1.1 oster }
521 1.1 oster /* if log wasn't used, attach it to the list of logs to be returned */
522 1.1 oster if (log)
523 1.1 oster {
524 1.1 oster log->next = freeLogList;
525 1.1 oster freeLogList = log;
526 1.1 oster }
527 1.1 oster }
528 1.1 oster if (freeLogList)
529 1.1 oster rf_ReleaseParityLogs(raidPtr, freeLogList);
530 1.1 oster }
531 1.1 oster
532 1.1 oster int rf_ShutdownLogging(RF_Raid_t *raidPtr)
533 1.1 oster {
534 1.1 oster /* shutdown parity logging
535 1.1 oster 1) disable parity logging in all regions
536 1.1 oster 2) reintegrate all regions
537 1.1 oster */
538 1.1 oster
539 1.1 oster RF_SectorCount_t diskCount;
540 1.1 oster RF_RegionId_t regionID;
541 1.1 oster RF_ParityLog_t *log;
542 1.1 oster
543 1.1 oster if (rf_parityLogDebug)
544 1.1 oster printf("[shutting down parity logging]\n");
545 1.1 oster /* Since parity log maps are volatile, we must reintegrate all regions. */
546 1.1 oster if (rf_forceParityLogReint) {
547 1.1 oster for (regionID = 0; regionID < rf_numParityRegions; regionID++)
548 1.1 oster {
549 1.1 oster RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
550 1.1 oster raidPtr->regionInfo[regionID].loggingEnabled = RF_FALSE;
551 1.1 oster log = raidPtr->regionInfo[regionID].coreLog;
552 1.1 oster raidPtr->regionInfo[regionID].coreLog = NULL;
553 1.1 oster diskCount = raidPtr->regionInfo[regionID].diskCount;
554 1.1 oster RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
555 1.1 oster if (diskCount > 0 || log != NULL)
556 1.1 oster ReintegrateRegion(raidPtr, regionID, log);
557 1.1 oster if (log != NULL)
558 1.1 oster rf_ReleaseParityLogs(raidPtr, log);
559 1.1 oster }
560 1.1 oster }
561 1.1 oster if (rf_parityLogDebug)
562 1.1 oster {
563 1.1 oster printf("[parity logging disabled]\n");
564 1.1 oster printf("[should be done!]\n");
565 1.1 oster }
566 1.1 oster return(0);
567 1.1 oster }
568 1.1 oster
569 1.1 oster int rf_ParityLoggingDiskManager(RF_Raid_t *raidPtr)
570 1.1 oster {
571 1.1 oster RF_ParityLog_t *reintQueue, *flushQueue;
572 1.1 oster int workNeeded, done = RF_FALSE;
573 1.1 oster
574 1.1 oster rf_assign_threadid(); /* don't remove this line */
575 1.1 oster
576 1.1 oster /* Main program for parity logging disk thread. This routine waits
577 1.1 oster for work to appear in either the flush or reintegration queues
578 1.1 oster and is responsible for flushing core logs to the log disk as
579 1.1 oster well as reintegrating parity regions.
580 1.1 oster
581 1.1 oster BLOCKING
582 1.1 oster */
583 1.1 oster
584 1.1 oster RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
585 1.1 oster
586 1.1 oster /*
587 1.1 oster * Inform our creator that we're running. Don't bother doing the
588 1.1 oster * mutex lock/unlock dance- we locked above, and we'll unlock
589 1.1 oster * below with nothing to do, yet.
590 1.1 oster */
591 1.1 oster raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING;
592 1.1 oster RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
593 1.1 oster
594 1.1 oster /* empty the work queues */
595 1.1 oster flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL;
596 1.1 oster reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL;
597 1.1 oster workNeeded = (flushQueue || reintQueue);
598 1.1 oster
599 1.1 oster while (!done)
600 1.1 oster {
601 1.1 oster while (workNeeded)
602 1.1 oster {
603 1.1 oster /* First, flush all logs in the flush queue, freeing buffers
604 1.1 oster Second, reintegrate all regions which are reported as full.
605 1.1 oster Third, append queued log data until blocked.
606 1.1 oster
607 1.1 oster Note: Incoming appends (ParityLogAppend) can block on either
608 1.1 oster 1. empty buffer pool
609 1.1 oster 2. region under reintegration
610 1.1 oster To preserve a global FIFO ordering of appends, buffers are not
611 1.1 oster released to the world until those appends blocked on buffers are
612 1.1 oster removed from the append queue. Similarly, regions which are
613 1.1 oster reintegrated are not opened for general use until the append
614 1.1 oster queue has been emptied.
615 1.1 oster */
616 1.1 oster
617 1.1 oster RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
618 1.1 oster
619 1.1 oster /* empty flushQueue, using free'd log buffers to process bufTail */
620 1.1 oster if (flushQueue)
621 1.1 oster FlushLogsToDisk(raidPtr, flushQueue);
622 1.1 oster
623 1.1 oster /* empty reintQueue, flushing from reintTail as we go */
624 1.1 oster if (reintQueue)
625 1.1 oster ReintegrateLogs(raidPtr, reintQueue);
626 1.1 oster
627 1.1 oster RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
628 1.1 oster flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL;
629 1.1 oster reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL;
630 1.1 oster workNeeded = (flushQueue || reintQueue);
631 1.1 oster }
632 1.1 oster /* no work is needed at this point */
633 1.1 oster if (raidPtr->parityLogDiskQueue.threadState&RF_PLOG_TERMINATE)
634 1.1 oster {
635 1.1 oster /* shutdown parity logging
636 1.1 oster 1. disable parity logging in all regions
637 1.1 oster 2. reintegrate all regions
638 1.1 oster */
639 1.1 oster done = RF_TRUE; /* thread disabled, no work needed */
640 1.1 oster RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
641 1.1 oster rf_ShutdownLogging(raidPtr);
642 1.1 oster }
643 1.1 oster if (!done)
644 1.1 oster {
645 1.1 oster /* thread enabled, no work needed, so sleep */
646 1.1 oster if (rf_parityLogDebug)
647 1.1 oster printf("[parity logging disk manager sleeping]\n");
648 1.1 oster RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex);
649 1.1 oster if (rf_parityLogDebug)
650 1.1 oster printf("[parity logging disk manager just woke up]\n");
651 1.1 oster flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL;
652 1.1 oster reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL;
653 1.1 oster workNeeded = (flushQueue || reintQueue);
654 1.1 oster }
655 1.1 oster }
656 1.1 oster /*
657 1.1 oster * Announce that we're done.
658 1.1 oster */
659 1.1 oster RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
660 1.1 oster raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN;
661 1.1 oster RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
662 1.1 oster RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
663 1.1 oster #if defined(__NetBSD__) && defined(_KERNEL)
664 1.1 oster /*
665 1.1 oster * In the NetBSD kernel, the thread must exit; returning would
666 1.1 oster * cause the proc trampoline to attempt to return to userspace.
667 1.1 oster */
668 1.1 oster kthread_exit(0); /* does not return */
669 1.1 oster #else
670 1.1 oster return(0);
671 1.1 oster #endif
672 1.1 oster }
673 1.1 oster
674 1.1 oster #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
675