rf_paritylogging.c revision 1.2 1 /* $NetBSD: rf_paritylogging.c,v 1.2 1999/01/26 02:34:00 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29
30 /*
31 parity logging configuration, dag selection, and mapping is implemented here
32 */
33
34 #include "rf_archs.h"
35
36 #if RF_INCLUDE_PARITYLOGGING > 0
37
38 #include "rf_types.h"
39 #include "rf_raid.h"
40 #include "rf_dag.h"
41 #include "rf_dagutils.h"
42 #include "rf_dagfuncs.h"
43 #include "rf_dagffrd.h"
44 #include "rf_dagffwr.h"
45 #include "rf_dagdegrd.h"
46 #include "rf_dagdegwr.h"
47 #include "rf_threadid.h"
48 #include "rf_paritylog.h"
49 #include "rf_paritylogDiskMgr.h"
50 #include "rf_paritylogging.h"
51 #include "rf_parityloggingdags.h"
52 #include "rf_general.h"
53 #include "rf_map.h"
54 #include "rf_utils.h"
55 #include "rf_shutdown.h"
56
57 typedef struct RF_ParityLoggingConfigInfo_s {
58 RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by IdentifyStripe */
59 } RF_ParityLoggingConfigInfo_t;
60
61 static void FreeRegionInfo(RF_Raid_t *raidPtr, RF_RegionId_t regionID);
62 static void rf_ShutdownParityLogging(RF_ThreadArg_t arg);
63 static void rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg);
64 static void rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg);
65 static void rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg);
66 static void rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg);
67 static void rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg);
68
69 int rf_ConfigureParityLogging(
70 RF_ShutdownList_t **listp,
71 RF_Raid_t *raidPtr,
72 RF_Config_t *cfgPtr)
73 {
74 int i, j, startdisk, rc;
75 RF_SectorCount_t totalLogCapacity, fragmentation, lastRegionCapacity;
76 RF_SectorCount_t parityBufferCapacity, maxRegionParityRange;
77 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
78 RF_ParityLoggingConfigInfo_t *info;
79 RF_ParityLog_t *l=NULL, *next;
80 caddr_t lHeapPtr;
81
82 /*
83 * We create multiple entries on the shutdown list here, since
84 * this configuration routine is fairly complicated in and of
85 * itself, and this makes backing out of a failed configuration
86 * much simpler.
87 */
88
89 raidPtr->numSectorsPerLog = RF_DEFAULT_NUM_SECTORS_PER_LOG;
90
91 /* create a parity logging configuration structure */
92 RF_MallocAndAdd(info, sizeof(RF_ParityLoggingConfigInfo_t), (RF_ParityLoggingConfigInfo_t *), raidPtr->cleanupList);
93 if (info == NULL)
94 return(ENOMEM);
95 layoutPtr->layoutSpecificInfo = (void *) info;
96
97 RF_ASSERT(raidPtr->numRow == 1);
98
99 /* the stripe identifier must identify the disks in each stripe,
100 * IN THE ORDER THAT THEY APPEAR IN THE STRIPE.
101 */
102 info->stripeIdentifier = rf_make_2d_array((raidPtr->numCol), (raidPtr->numCol), raidPtr->cleanupList);
103 if (info->stripeIdentifier == NULL)
104 return(ENOMEM);
105
106 startdisk = 0;
107 for (i=0; i<(raidPtr->numCol); i++)
108 {
109 for (j=0; j<(raidPtr->numCol); j++)
110 {
111 info->stripeIdentifier[i][j] = (startdisk + j) % (raidPtr->numCol - 1);
112 }
113 if ((--startdisk) < 0)
114 startdisk = raidPtr->numCol-1-1;
115 }
116
117 /* fill in the remaining layout parameters */
118 layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk;
119 layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
120 layoutPtr->numParityCol = 1;
121 layoutPtr->numParityLogCol = 1;
122 layoutPtr->numDataCol = raidPtr->numCol - layoutPtr->numParityCol - layoutPtr->numParityLogCol;
123 layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
124 layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk;
125 raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
126
127 raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
128
129 /* configure parity log parameters
130
131 parameter comment/constraints
132 ---------------- -------------------
133 * numParityRegions all regions (except possibly last) of equal size
134 * totalInCoreLogCapacity amount of memory in bytes available for in-core logs (default 1 MB)
135 # numSectorsPerLog capacity of an in-core log in sectors (1 disk track)
136 numParityLogs total number of in-core logs, should be at least numParityRegions
137 regionLogCapacity size of a region log (except possibly last one) in sectors
138 totalLogCapacity total amount of log space in sectors
139
140 * denotes a user settable parameter.
141 # logs are fixed to be the size of a disk track, value #defined in rf_paritylog.h
142
143 */
144
145 totalLogCapacity = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit * layoutPtr->numParityLogCol;
146 raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions;
147 if (rf_parityLogDebug)
148 printf("bytes per sector %d\n", raidPtr->bytesPerSector);
149
150 /* reduce fragmentation within a disk region by adjusting the number of regions
151 in an attempt to allow an integral number of logs to fit into a disk region */
152 fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog;
153 if (fragmentation > 0)
154 for (i = 1; i < (raidPtr->numSectorsPerLog / 2); i++)
155 {
156 if (((totalLogCapacity / (rf_numParityRegions + i)) % raidPtr->numSectorsPerLog) < fragmentation)
157 {
158 rf_numParityRegions++;
159 raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions;
160 fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog;
161 }
162 if (((totalLogCapacity / (rf_numParityRegions - i)) % raidPtr->numSectorsPerLog) < fragmentation)
163 {
164 rf_numParityRegions--;
165 raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions;
166 fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog;
167 }
168 }
169 /* ensure integral number of regions per log */
170 raidPtr->regionLogCapacity = (raidPtr->regionLogCapacity / raidPtr->numSectorsPerLog) * raidPtr->numSectorsPerLog;
171
172 raidPtr->numParityLogs = rf_totalInCoreLogCapacity / (raidPtr->bytesPerSector * raidPtr->numSectorsPerLog);
173 /* to avoid deadlock, must ensure that enough logs exist for each region to have one simultaneously */
174 if (raidPtr->numParityLogs < rf_numParityRegions)
175 raidPtr->numParityLogs = rf_numParityRegions;
176
177 /* create region information structs */
178 RF_Malloc(raidPtr->regionInfo, (rf_numParityRegions * sizeof(RF_RegionInfo_t)), (RF_RegionInfo_t *));
179 if (raidPtr->regionInfo == NULL)
180 return(ENOMEM);
181
182 /* last region may not be full capacity */
183 lastRegionCapacity = raidPtr->regionLogCapacity;
184 while ((rf_numParityRegions - 1) * raidPtr->regionLogCapacity + lastRegionCapacity > totalLogCapacity)
185 lastRegionCapacity = lastRegionCapacity - raidPtr->numSectorsPerLog;
186
187 raidPtr->regionParityRange = raidPtr->sectorsPerDisk / rf_numParityRegions;
188 maxRegionParityRange = raidPtr->regionParityRange;
189
190 /* i can't remember why this line is in the code -wvcii 6/30/95 */
191 /* if (raidPtr->sectorsPerDisk % rf_numParityRegions > 0)
192 regionParityRange++; */
193
194 /* build pool of unused parity logs */
195 RF_Malloc(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector, (caddr_t));
196 if (raidPtr->parityLogBufferHeap == NULL)
197 return(ENOMEM);
198 lHeapPtr = raidPtr->parityLogBufferHeap;
199 rc = rf_mutex_init(&raidPtr->parityLogPool.mutex);
200 if (rc) {
201 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
202 __LINE__, rc);
203 RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector);
204 return(ENOMEM);
205 }
206 for (i = 0; i < raidPtr->numParityLogs; i++)
207 {
208 if (i == 0)
209 {
210 RF_Calloc(raidPtr->parityLogPool.parityLogs, 1, sizeof(RF_ParityLog_t), (RF_ParityLog_t *));
211 if (raidPtr->parityLogPool.parityLogs == NULL) {
212 RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector);
213 return(ENOMEM);
214 }
215 l = raidPtr->parityLogPool.parityLogs;
216 }
217 else
218 {
219 RF_Calloc(l->next, 1, sizeof(RF_ParityLog_t), (RF_ParityLog_t *));
220 if (l->next == NULL) {
221 RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector);
222 for(l=raidPtr->parityLogPool.parityLogs;l;l=next) {
223 next = l->next;
224 if (l->records)
225 RF_Free(l->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t)));
226 RF_Free(l, sizeof(RF_ParityLog_t));
227 }
228 return(ENOMEM);
229 }
230 l = l->next;
231 }
232 l->bufPtr = lHeapPtr;
233 lHeapPtr += raidPtr->numSectorsPerLog * raidPtr->bytesPerSector;
234 RF_Malloc(l->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t)), (RF_ParityLogRecord_t *));
235 if (l->records == NULL) {
236 RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector);
237 for(l=raidPtr->parityLogPool.parityLogs;l;l=next) {
238 next = l->next;
239 if (l->records)
240 RF_Free(l->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t)));
241 RF_Free(l, sizeof(RF_ParityLog_t));
242 }
243 return(ENOMEM);
244 }
245 }
246 rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingPool, raidPtr);
247 if (rc) {
248 RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__,
249 __LINE__, rc);
250 rf_ShutdownParityLoggingPool(raidPtr);
251 return(rc);
252 }
253
254 /* build pool of region buffers */
255 rc = rf_mutex_init(&raidPtr->regionBufferPool.mutex);
256 if (rc) {
257 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
258 __LINE__, rc);
259 return(ENOMEM);
260 }
261 rc = rf_cond_init(&raidPtr->regionBufferPool.cond);
262 if (rc) {
263 RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__,
264 __LINE__, rc);
265 rf_mutex_destroy(&raidPtr->regionBufferPool.mutex);
266 return(ENOMEM);
267 }
268 raidPtr->regionBufferPool.bufferSize = raidPtr->regionLogCapacity * raidPtr->bytesPerSector;
269 printf("regionBufferPool.bufferSize %d\n",raidPtr->regionBufferPool.bufferSize);
270 raidPtr->regionBufferPool.totalBuffers = 1; /* for now, only one region at a time may be reintegrated */
271 raidPtr->regionBufferPool.availableBuffers = raidPtr->regionBufferPool.totalBuffers;
272 raidPtr->regionBufferPool.availBuffersIndex = 0;
273 raidPtr->regionBufferPool.emptyBuffersIndex = 0;
274 RF_Malloc(raidPtr->regionBufferPool.buffers, raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t), (caddr_t *));
275 if (raidPtr->regionBufferPool.buffers == NULL) {
276 rf_mutex_destroy(&raidPtr->regionBufferPool.mutex);
277 rf_cond_destroy(&raidPtr->regionBufferPool.cond);
278 return(ENOMEM);
279 }
280 for (i = 0; i < raidPtr->regionBufferPool.totalBuffers; i++) {
281 RF_Malloc(raidPtr->regionBufferPool.buffers[i], raidPtr->regionBufferPool.bufferSize * sizeof(char), (caddr_t));
282 if (raidPtr->regionBufferPool.buffers == NULL) {
283 rf_mutex_destroy(&raidPtr->regionBufferPool.mutex);
284 rf_cond_destroy(&raidPtr->regionBufferPool.cond);
285 for(j=0;j<i;j++) {
286 RF_Free(raidPtr->regionBufferPool.buffers[i], raidPtr->regionBufferPool.bufferSize * sizeof(char));
287 }
288 RF_Free(raidPtr->regionBufferPool.buffers, raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t));
289 return(ENOMEM);
290 }
291 printf("raidPtr->regionBufferPool.buffers[%d] = %lx\n", i,
292 (long)raidPtr->regionBufferPool.buffers[i]);
293 }
294 rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingRegionBufferPool, raidPtr);
295 if (rc) {
296 RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__,
297 __LINE__, rc);
298 rf_ShutdownParityLoggingRegionBufferPool(raidPtr);
299 return(rc);
300 }
301
302 /* build pool of parity buffers */
303 parityBufferCapacity = maxRegionParityRange;
304 rc = rf_mutex_init(&raidPtr->parityBufferPool.mutex);
305 if (rc) {
306 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
307 __LINE__, rc);
308 return(rc);
309 }
310 rc = rf_cond_init(&raidPtr->parityBufferPool.cond);
311 if (rc) {
312 RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__,
313 __LINE__, rc);
314 rf_mutex_destroy(&raidPtr->parityBufferPool.mutex);
315 return(ENOMEM);
316 }
317 raidPtr->parityBufferPool.bufferSize = parityBufferCapacity * raidPtr->bytesPerSector;
318 printf("parityBufferPool.bufferSize %d\n",raidPtr->parityBufferPool.bufferSize);
319 raidPtr->parityBufferPool.totalBuffers = 1; /* for now, only one region at a time may be reintegrated */
320 raidPtr->parityBufferPool.availableBuffers = raidPtr->parityBufferPool.totalBuffers;
321 raidPtr->parityBufferPool.availBuffersIndex = 0;
322 raidPtr->parityBufferPool.emptyBuffersIndex = 0;
323 RF_Malloc(raidPtr->parityBufferPool.buffers, raidPtr->parityBufferPool.totalBuffers * sizeof(caddr_t), (caddr_t *));
324 if (raidPtr->parityBufferPool.buffers == NULL) {
325 rf_mutex_destroy(&raidPtr->parityBufferPool.mutex);
326 rf_cond_destroy(&raidPtr->parityBufferPool.cond);
327 return(ENOMEM);
328 }
329 for (i = 0; i < raidPtr->parityBufferPool.totalBuffers; i++) {
330 RF_Malloc(raidPtr->parityBufferPool.buffers[i], raidPtr->parityBufferPool.bufferSize * sizeof(char), (caddr_t));
331 if (raidPtr->parityBufferPool.buffers == NULL) {
332 rf_mutex_destroy(&raidPtr->parityBufferPool.mutex);
333 rf_cond_destroy(&raidPtr->parityBufferPool.cond);
334 for(j=0;j<i;j++) {
335 RF_Free(raidPtr->parityBufferPool.buffers[i], raidPtr->regionBufferPool.bufferSize * sizeof(char));
336 }
337 RF_Free(raidPtr->parityBufferPool.buffers, raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t));
338 return(ENOMEM);
339 }
340 printf("parityBufferPool.buffers[%d] = %lx\n", i,
341 (long)raidPtr->parityBufferPool.buffers[i]);
342 }
343 rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingParityBufferPool, raidPtr);
344 if (rc) {
345 RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__,
346 __LINE__, rc);
347 rf_ShutdownParityLoggingParityBufferPool(raidPtr);
348 return(rc);
349 }
350
351 /* initialize parityLogDiskQueue */
352 rc = rf_create_managed_mutex(listp, &raidPtr->parityLogDiskQueue.mutex);
353 if (rc) {
354 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
355 __LINE__, rc);
356 return(rc);
357 }
358 rc = rf_create_managed_cond(listp, &raidPtr->parityLogDiskQueue.cond);
359 if (rc) {
360 RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__,
361 __LINE__, rc);
362 return(rc);
363 }
364 raidPtr->parityLogDiskQueue.flushQueue = NULL;
365 raidPtr->parityLogDiskQueue.reintQueue = NULL;
366 raidPtr->parityLogDiskQueue.bufHead = NULL;
367 raidPtr->parityLogDiskQueue.bufTail = NULL;
368 raidPtr->parityLogDiskQueue.reintHead = NULL;
369 raidPtr->parityLogDiskQueue.reintTail = NULL;
370 raidPtr->parityLogDiskQueue.logBlockHead = NULL;
371 raidPtr->parityLogDiskQueue.logBlockTail = NULL;
372 raidPtr->parityLogDiskQueue.reintBlockHead = NULL;
373 raidPtr->parityLogDiskQueue.reintBlockTail = NULL;
374 raidPtr->parityLogDiskQueue.freeDataList = NULL;
375 raidPtr->parityLogDiskQueue.freeCommonList = NULL;
376
377 rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingDiskQueue, raidPtr);
378 if (rc) {
379 RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__,
380 __LINE__, rc);
381 return(rc);
382 }
383
384 for (i = 0; i < rf_numParityRegions; i++)
385 {
386 rc = rf_mutex_init(&raidPtr->regionInfo[i].mutex);
387 if (rc) {
388 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
389 __LINE__, rc);
390 for(j=0;j<i;j++)
391 FreeRegionInfo(raidPtr, j);
392 RF_Free(raidPtr->regionInfo, (rf_numParityRegions * sizeof(RF_RegionInfo_t)));
393 return(ENOMEM);
394 }
395 rc = rf_mutex_init(&raidPtr->regionInfo[i].reintMutex);
396 if (rc) {
397 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
398 __LINE__, rc);
399 rf_mutex_destroy(&raidPtr->regionInfo[i].mutex);
400 for(j=0;j<i;j++)
401 FreeRegionInfo(raidPtr, j);
402 RF_Free(raidPtr->regionInfo, (rf_numParityRegions * sizeof(RF_RegionInfo_t)));
403 return(ENOMEM);
404 }
405 raidPtr->regionInfo[i].reintInProgress = RF_FALSE;
406 raidPtr->regionInfo[i].regionStartAddr = raidPtr->regionLogCapacity * i;
407 raidPtr->regionInfo[i].parityStartAddr = raidPtr->regionParityRange * i;
408 if (i < rf_numParityRegions - 1)
409 {
410 raidPtr->regionInfo[i].capacity = raidPtr->regionLogCapacity;
411 raidPtr->regionInfo[i].numSectorsParity = raidPtr->regionParityRange;
412 }
413 else
414 {
415 raidPtr->regionInfo[i].capacity = lastRegionCapacity;
416 raidPtr->regionInfo[i].numSectorsParity = raidPtr->sectorsPerDisk - raidPtr->regionParityRange * i;
417 if (raidPtr->regionInfo[i].numSectorsParity > maxRegionParityRange)
418 maxRegionParityRange = raidPtr->regionInfo[i].numSectorsParity;
419 }
420 raidPtr->regionInfo[i].diskCount = 0;
421 RF_ASSERT(raidPtr->regionInfo[i].capacity + raidPtr->regionInfo[i].regionStartAddr <= totalLogCapacity);
422 RF_ASSERT(raidPtr->regionInfo[i].parityStartAddr + raidPtr->regionInfo[i].numSectorsParity <= raidPtr->sectorsPerDisk);
423 RF_Malloc(raidPtr->regionInfo[i].diskMap, (raidPtr->regionInfo[i].capacity * sizeof(RF_DiskMap_t)), (RF_DiskMap_t *));
424 if (raidPtr->regionInfo[i].diskMap == NULL) {
425 rf_mutex_destroy(&raidPtr->regionInfo[i].mutex);
426 rf_mutex_destroy(&raidPtr->regionInfo[i].reintMutex);
427 for(j=0;j<i;j++)
428 FreeRegionInfo(raidPtr, j);
429 RF_Free(raidPtr->regionInfo, (rf_numParityRegions * sizeof(RF_RegionInfo_t)));
430 return(ENOMEM);
431 }
432 raidPtr->regionInfo[i].loggingEnabled = RF_FALSE;
433 raidPtr->regionInfo[i].coreLog = NULL;
434 }
435 rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingRegionInfo, raidPtr);
436 if (rc) {
437 RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__,
438 __LINE__, rc);
439 rf_ShutdownParityLoggingRegionInfo(raidPtr);
440 return(rc);
441 }
442
443 RF_ASSERT(raidPtr->parityLogDiskQueue.threadState == 0);
444 raidPtr->parityLogDiskQueue.threadState = RF_PLOG_CREATED;
445 rc = RF_CREATE_THREAD(raidPtr->pLogDiskThreadHandle, rf_ParityLoggingDiskManager, raidPtr);
446 if (rc) {
447 raidPtr->parityLogDiskQueue.threadState = 0;
448 RF_ERRORMSG3("Unable to create parity logging disk thread file %s line %d rc=%d\n",
449 __FILE__, __LINE__, rc);
450 return(ENOMEM);
451 }
452 /* wait for thread to start */
453 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
454 while(!(raidPtr->parityLogDiskQueue.threadState&RF_PLOG_RUNNING)) {
455 RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex);
456 }
457 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
458
459 rc = rf_ShutdownCreate(listp, rf_ShutdownParityLogging, raidPtr);
460 if (rc) {
461 RF_ERRORMSG1("Got rc=%d adding parity logging shutdown event\n", rc);
462 rf_ShutdownParityLogging(raidPtr);
463 return(rc);
464 }
465
466 if (rf_parityLogDebug)
467 {
468 printf(" size of disk log in sectors: %d\n",
469 (int)totalLogCapacity);
470 printf(" total number of parity regions is %d\n", (int)rf_numParityRegions);
471 printf(" nominal sectors of log per parity region is %d\n", (int)raidPtr->regionLogCapacity);
472 printf(" nominal region fragmentation is %d sectors\n",(int)fragmentation);
473 printf(" total number of parity logs is %d\n", raidPtr->numParityLogs);
474 printf(" parity log size is %d sectors\n", raidPtr->numSectorsPerLog);
475 printf(" total in-core log space is %d bytes\n", (int) rf_totalInCoreLogCapacity);
476 }
477
478 rf_EnableParityLogging(raidPtr);
479
480 return(0);
481 }
482
483 static void FreeRegionInfo(
484 RF_Raid_t *raidPtr,
485 RF_RegionId_t regionID)
486 {
487 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
488 RF_Free(raidPtr->regionInfo[regionID].diskMap, (raidPtr->regionInfo[regionID].capacity * sizeof(RF_DiskMap_t)));
489 if (!rf_forceParityLogReint && raidPtr->regionInfo[regionID].coreLog) {
490 rf_ReleaseParityLogs(raidPtr, raidPtr->regionInfo[regionID].coreLog);
491 raidPtr->regionInfo[regionID].coreLog = NULL;
492 }
493 else {
494 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog == NULL);
495 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == 0);
496 }
497 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
498 rf_mutex_destroy(&raidPtr->regionInfo[regionID].mutex);
499 rf_mutex_destroy(&raidPtr->regionInfo[regionID].reintMutex);
500 }
501
502
503 static void FreeParityLogQueue(
504 RF_Raid_t *raidPtr,
505 RF_ParityLogQueue_t *queue)
506 {
507 RF_ParityLog_t *l1, *l2;
508
509 RF_LOCK_MUTEX(queue->mutex);
510 l1 = queue->parityLogs;
511 while (l1)
512 {
513 l2 = l1;
514 l1 = l2->next;
515 RF_Free(l2->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t)));
516 RF_Free(l2, sizeof(RF_ParityLog_t));
517 }
518 RF_UNLOCK_MUTEX(queue->mutex);
519 rf_mutex_destroy(&queue->mutex);
520 }
521
522
523 static void FreeRegionBufferQueue(RF_RegionBufferQueue_t *queue)
524 {
525 int i;
526
527 RF_LOCK_MUTEX(queue->mutex);
528 if (queue->availableBuffers != queue->totalBuffers)
529 {
530 printf("Attempt to free region queue which is still in use!\n");
531 RF_ASSERT(0);
532 }
533 for (i = 0; i < queue->totalBuffers; i++)
534 RF_Free(queue->buffers[i], queue->bufferSize);
535 RF_Free(queue->buffers, queue->totalBuffers * sizeof(caddr_t));
536 RF_UNLOCK_MUTEX(queue->mutex);
537 rf_mutex_destroy(&queue->mutex);
538 }
539
540 static void rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg)
541 {
542 RF_Raid_t *raidPtr;
543 RF_RegionId_t i;
544
545 raidPtr = (RF_Raid_t *)arg;
546 if (rf_parityLogDebug) {
547 int tid;
548 rf_get_threadid(tid);
549 printf("[%d] ShutdownParityLoggingRegionInfo\n", tid);
550 }
551 /* free region information structs */
552 for (i = 0; i < rf_numParityRegions; i++)
553 FreeRegionInfo(raidPtr, i);
554 RF_Free(raidPtr->regionInfo, (rf_numParityRegions * sizeof(raidPtr->regionInfo)));
555 raidPtr->regionInfo = NULL;
556 }
557
558 static void rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg)
559 {
560 RF_Raid_t *raidPtr;
561
562 raidPtr = (RF_Raid_t *)arg;
563 if (rf_parityLogDebug) {
564 int tid;
565 rf_get_threadid(tid);
566 printf("[%d] ShutdownParityLoggingPool\n", tid);
567 }
568 /* free contents of parityLogPool */
569 FreeParityLogQueue(raidPtr, &raidPtr->parityLogPool);
570 RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector);
571 }
572
573 static void rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg)
574 {
575 RF_Raid_t *raidPtr;
576
577 raidPtr = (RF_Raid_t *)arg;
578 if (rf_parityLogDebug) {
579 int tid;
580 rf_get_threadid(tid);
581 printf("[%d] ShutdownParityLoggingRegionBufferPool\n", tid);
582 }
583 FreeRegionBufferQueue(&raidPtr->regionBufferPool);
584 }
585
586 static void rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg)
587 {
588 RF_Raid_t *raidPtr;
589
590 raidPtr = (RF_Raid_t *)arg;
591 if (rf_parityLogDebug) {
592 int tid;
593 rf_get_threadid(tid);
594 printf("[%d] ShutdownParityLoggingParityBufferPool\n", tid);
595 }
596 FreeRegionBufferQueue(&raidPtr->parityBufferPool);
597 }
598
599 static void rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg)
600 {
601 RF_ParityLogData_t *d;
602 RF_CommonLogData_t *c;
603 RF_Raid_t *raidPtr;
604
605 raidPtr = (RF_Raid_t *)arg;
606 if (rf_parityLogDebug) {
607 int tid;
608 rf_get_threadid(tid);
609 printf("[%d] ShutdownParityLoggingDiskQueue\n", tid);
610 }
611 /* free disk manager stuff */
612 RF_ASSERT(raidPtr->parityLogDiskQueue.bufHead == NULL);
613 RF_ASSERT(raidPtr->parityLogDiskQueue.bufTail == NULL);
614 RF_ASSERT(raidPtr->parityLogDiskQueue.reintHead == NULL);
615 RF_ASSERT(raidPtr->parityLogDiskQueue.reintTail == NULL);
616 while (raidPtr->parityLogDiskQueue.freeDataList)
617 {
618 d = raidPtr->parityLogDiskQueue.freeDataList;
619 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
620 RF_Free(d, sizeof(RF_ParityLogData_t));
621 }
622 while (raidPtr->parityLogDiskQueue.freeCommonList)
623 {
624 c = raidPtr->parityLogDiskQueue.freeCommonList;
625 rf_mutex_destroy(&c->mutex);
626 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
627 RF_Free(c, sizeof(RF_CommonLogData_t));
628 }
629 }
630
631 static void rf_ShutdownParityLogging(RF_ThreadArg_t arg)
632 {
633 RF_Raid_t *raidPtr;
634
635 raidPtr = (RF_Raid_t *)arg;
636 if (rf_parityLogDebug) {
637 int tid;
638 rf_get_threadid(tid);
639 printf("[%d] ShutdownParityLogging\n", tid);
640 }
641
642 /* shutdown disk thread */
643 /* This has the desirable side-effect of forcing all regions to be
644 reintegrated. This is necessary since all parity log maps are
645 currently held in volatile memory. */
646
647 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
648 raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_TERMINATE;
649 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
650 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
651 /*
652 * pLogDiskThread will now terminate when queues are cleared
653 * now wait for it to be done
654 */
655 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
656 while(!(raidPtr->parityLogDiskQueue.threadState&RF_PLOG_SHUTDOWN)) {
657 RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex);
658 }
659 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
660 if (rf_parityLogDebug) {
661 int tid;
662 rf_get_threadid(tid);
663 printf("[%d] ShutdownParityLogging done (thread completed)\n", tid);
664 }
665 }
666
667 int rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t *raidPtr)
668 {
669 return(20);
670 }
671
672 RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t *raidPtr)
673 {
674 return(10);
675 }
676
677 /* return the region ID for a given RAID address */
678 RF_RegionId_t rf_MapRegionIDParityLogging(
679 RF_Raid_t *raidPtr,
680 RF_SectorNum_t address)
681 {
682 RF_RegionId_t regionID;
683
684 /* regionID = address / (raidPtr->regionParityRange * raidPtr->Layout.numDataCol); */
685 regionID = address / raidPtr->regionParityRange;
686 if (regionID == rf_numParityRegions)
687 {
688 /* last region may be larger than other regions */
689 regionID--;
690 }
691 RF_ASSERT(address >= raidPtr->regionInfo[regionID].parityStartAddr);
692 RF_ASSERT(address < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
693 RF_ASSERT(regionID < rf_numParityRegions);
694 return(regionID);
695 }
696
697
698 /* given a logical RAID sector, determine physical disk address of data */
699 void rf_MapSectorParityLogging(
700 RF_Raid_t *raidPtr,
701 RF_RaidAddr_t raidSector,
702 RF_RowCol_t *row,
703 RF_RowCol_t *col,
704 RF_SectorNum_t *diskSector,
705 int remap)
706 {
707 RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
708 *row = 0;
709 /* *col = (SUID % (raidPtr->numCol - raidPtr->Layout.numParityLogCol)); */
710 *col = SUID % raidPtr->Layout.numDataCol;
711 *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
712 (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
713 }
714
715
716 /* given a logical RAID sector, determine physical disk address of parity */
717 void rf_MapParityParityLogging(
718 RF_Raid_t *raidPtr,
719 RF_RaidAddr_t raidSector,
720 RF_RowCol_t *row,
721 RF_RowCol_t *col,
722 RF_SectorNum_t *diskSector,
723 int remap)
724 {
725 RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
726
727 *row = 0;
728 /* *col = raidPtr->Layout.numDataCol-(SUID/raidPtr->Layout.numDataCol)%(raidPtr->numCol - raidPtr->Layout.numParityLogCol); */
729 *col = raidPtr->Layout.numDataCol;
730 *diskSector =(SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
731 (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
732 }
733
734
735 /* given a regionID and sector offset, determine the physical disk address of the parity log */
736 void rf_MapLogParityLogging(
737 RF_Raid_t *raidPtr,
738 RF_RegionId_t regionID,
739 RF_SectorNum_t regionOffset,
740 RF_RowCol_t *row,
741 RF_RowCol_t *col,
742 RF_SectorNum_t *startSector)
743 {
744 *row = 0;
745 *col = raidPtr->numCol - 1;
746 *startSector = raidPtr->regionInfo[regionID].regionStartAddr + regionOffset;
747 }
748
749
750 /* given a regionID, determine the physical disk address of the logged parity for that region */
751 void rf_MapRegionParity(
752 RF_Raid_t *raidPtr,
753 RF_RegionId_t regionID,
754 RF_RowCol_t *row,
755 RF_RowCol_t *col,
756 RF_SectorNum_t *startSector,
757 RF_SectorCount_t *numSector)
758 {
759 *row = 0;
760 *col = raidPtr->numCol - 2;
761 *startSector = raidPtr->regionInfo[regionID].parityStartAddr;
762 *numSector = raidPtr->regionInfo[regionID].numSectorsParity;
763 }
764
765
766 /* given a logical RAID address, determine the participating disks in the stripe */
767 void rf_IdentifyStripeParityLogging(
768 RF_Raid_t *raidPtr,
769 RF_RaidAddr_t addr,
770 RF_RowCol_t **diskids,
771 RF_RowCol_t *outRow)
772 {
773 RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr);
774 RF_ParityLoggingConfigInfo_t *info = (RF_ParityLoggingConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
775 *outRow = 0;
776 *diskids = info->stripeIdentifier[ stripeID % raidPtr->numCol ];
777 }
778
779
780 void rf_MapSIDToPSIDParityLogging(
781 RF_RaidLayout_t *layoutPtr,
782 RF_StripeNum_t stripeID,
783 RF_StripeNum_t *psID,
784 RF_ReconUnitNum_t *which_ru)
785 {
786 *which_ru = 0;
787 *psID = stripeID;
788 }
789
790
791 /* select an algorithm for performing an access. Returns two pointers,
792 * one to a function that will return information about the DAG, and
793 * another to a function that will create the dag.
794 */
795 void rf_ParityLoggingDagSelect(
796 RF_Raid_t *raidPtr,
797 RF_IoType_t type,
798 RF_AccessStripeMap_t *asmp,
799 RF_VoidFuncPtr *createFunc)
800 {
801 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
802 RF_PhysDiskAddr_t *failedPDA=NULL;
803 RF_RowCol_t frow, fcol;
804 RF_RowStatus_t rstat;
805 int prior_recon;
806 int tid;
807
808 RF_ASSERT(RF_IO_IS_R_OR_W(type));
809
810 if (asmp->numDataFailed + asmp->numParityFailed > 1) {
811 RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n");
812 /* *infoFunc = */ *createFunc = NULL;
813 return;
814 } else if (asmp->numDataFailed + asmp->numParityFailed == 1) {
815
816 /* if under recon & already reconstructed, redirect the access to the spare drive
817 * and eliminate the failure indication
818 */
819 failedPDA = asmp->failedPDAs[0];
820 frow = failedPDA->row; fcol = failedPDA->col;
821 rstat = raidPtr->status[failedPDA->row];
822 prior_recon = (rstat == rf_rs_reconfigured) || (
823 (rstat == rf_rs_reconstructing) ?
824 rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0
825 );
826 if (prior_recon) {
827 RF_RowCol_t or = failedPDA->row,oc=failedPDA->col;
828 RF_SectorNum_t oo=failedPDA->startSector;
829 if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { /* redirect to dist spare space */
830
831 if (failedPDA == asmp->parityInfo) {
832
833 /* parity has failed */
834 (layoutPtr->map->MapParity)(raidPtr, failedPDA->raidAddress, &failedPDA->row,
835 &failedPDA->col, &failedPDA->startSector, RF_REMAP);
836
837 if (asmp->parityInfo->next) { /* redir 2nd component, if any */
838 RF_PhysDiskAddr_t *p = asmp->parityInfo->next;
839 RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit;
840 p->row = failedPDA->row;
841 p->col = failedPDA->col;
842 p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) +
843 SUoffs; /* cheating: startSector is not really a RAID address */
844 }
845
846 } else if (asmp->parityInfo->next && failedPDA == asmp->parityInfo->next) {
847 RF_ASSERT(0); /* should not ever happen */
848 } else {
849
850 /* data has failed */
851 (layoutPtr->map->MapSector)(raidPtr, failedPDA->raidAddress, &failedPDA->row,
852 &failedPDA->col, &failedPDA->startSector, RF_REMAP);
853
854 }
855
856 } else { /* redirect to dedicated spare space */
857
858 failedPDA->row = raidPtr->Disks[frow][fcol].spareRow;
859 failedPDA->col = raidPtr->Disks[frow][fcol].spareCol;
860
861 /* the parity may have two distinct components, both of which may need to be redirected */
862 if (asmp->parityInfo->next) {
863 if (failedPDA == asmp->parityInfo) {
864 failedPDA->next->row = failedPDA->row;
865 failedPDA->next->col = failedPDA->col;
866 } else if (failedPDA == asmp->parityInfo->next) { /* paranoid: should never occur */
867 asmp->parityInfo->row = failedPDA->row;
868 asmp->parityInfo->col = failedPDA->col;
869 }
870 }
871 }
872
873 RF_ASSERT(failedPDA->col != -1);
874
875 if (rf_dagDebug || rf_mapDebug) {
876 rf_get_threadid(tid);
877 printf("[%d] Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n",
878 tid,type,or,oc,(long)oo,failedPDA->row,failedPDA->col,(long)failedPDA->startSector);
879 }
880
881 asmp->numDataFailed = asmp->numParityFailed = 0;
882 }
883
884 }
885
886
887 if (type == RF_IO_TYPE_READ) {
888
889 if (asmp->numDataFailed == 0)
890 *createFunc = (RF_VoidFuncPtr)rf_CreateFaultFreeReadDAG;
891 else
892 *createFunc = (RF_VoidFuncPtr)rf_CreateRaidFiveDegradedReadDAG;
893
894 }
895 else {
896
897
898 /* if mirroring, always use large writes. If the access requires two distinct parity updates,
899 * always do a small write. If the stripe contains a failure but the access does not, do a
900 * small write.
901 * The first conditional (numStripeUnitsAccessed <= numDataCol/2) uses a less-than-or-equal
902 * rather than just a less-than because when G is 3 or 4, numDataCol/2 is 1, and I want
903 * single-stripe-unit updates to use just one disk.
904 */
905 if ( (asmp->numDataFailed + asmp->numParityFailed) == 0) {
906 if (((asmp->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol!=1)) ||
907 (asmp->parityInfo->next!=NULL) || rf_CheckStripeForFailures(raidPtr, asmp)) {
908 *createFunc = (RF_VoidFuncPtr)rf_CreateParityLoggingSmallWriteDAG;
909 }
910 else
911 *createFunc = (RF_VoidFuncPtr)rf_CreateParityLoggingLargeWriteDAG;
912 }
913 else
914 if (asmp->numParityFailed == 1)
915 *createFunc = (RF_VoidFuncPtr)rf_CreateNonRedundantWriteDAG;
916 else
917 if (asmp->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit)
918 *createFunc = NULL;
919 else
920 *createFunc = (RF_VoidFuncPtr)rf_CreateDegradedWriteDAG;
921 }
922 }
923
924 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
925