1 1.35 christos /* $NetBSD: rf_paritylogging.c,v 1.35 2019/02/09 03:34:00 christos Exp $ */ 2 1.1 oster /* 3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University. 4 1.1 oster * All rights reserved. 5 1.1 oster * 6 1.1 oster * Author: William V. Courtright II 7 1.1 oster * 8 1.1 oster * Permission to use, copy, modify and distribute this software and 9 1.1 oster * its documentation is hereby granted, provided that both the copyright 10 1.1 oster * notice and this permission notice appear in all copies of the 11 1.1 oster * software, derivative works or modified versions, and any portions 12 1.1 oster * thereof, and that both notices appear in supporting documentation. 13 1.1 oster * 14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 1.1 oster * 18 1.1 oster * Carnegie Mellon requests users of this software to return to 19 1.1 oster * 20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU 21 1.1 oster * School of Computer Science 22 1.1 oster * Carnegie Mellon University 23 1.1 oster * Pittsburgh PA 15213-3890 24 1.1 oster * 25 1.1 oster * any improvements or extensions that they make and grant Carnegie the 26 1.1 oster * rights to redistribute these changes. 27 1.1 oster */ 28 1.1 oster 29 1.1 oster 30 1.1 oster /* 31 1.1 oster parity logging configuration, dag selection, and mapping is implemented here 32 1.1 oster */ 33 1.12 lukem 34 1.12 lukem #include <sys/cdefs.h> 35 1.35 christos __KERNEL_RCSID(0, "$NetBSD: rf_paritylogging.c,v 1.35 2019/02/09 03:34:00 christos Exp $"); 36 1.1 oster 37 1.1 oster #include "rf_archs.h" 38 1.1 oster 39 1.1 oster #if RF_INCLUDE_PARITYLOGGING > 0 40 1.1 oster 41 1.11 oster #include <dev/raidframe/raidframevar.h> 42 1.11 oster 43 1.1 oster #include "rf_raid.h" 44 1.1 oster #include "rf_dag.h" 45 1.1 oster #include "rf_dagutils.h" 46 1.1 oster #include "rf_dagfuncs.h" 47 1.1 oster #include "rf_dagffrd.h" 48 1.1 oster #include "rf_dagffwr.h" 49 1.1 oster #include "rf_dagdegrd.h" 50 1.1 oster #include "rf_dagdegwr.h" 51 1.1 oster #include "rf_paritylog.h" 52 1.1 oster #include "rf_paritylogDiskMgr.h" 53 1.1 oster #include "rf_paritylogging.h" 54 1.1 oster #include "rf_parityloggingdags.h" 55 1.1 oster #include "rf_general.h" 56 1.1 oster #include "rf_map.h" 57 1.1 oster #include "rf_utils.h" 58 1.1 oster #include "rf_shutdown.h" 59 1.1 oster 60 1.1 oster typedef struct RF_ParityLoggingConfigInfo_s { 61 1.3 oster RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by 62 1.3 oster * IdentifyStripe */ 63 1.3 oster } RF_ParityLoggingConfigInfo_t; 64 1.1 oster 65 1.3 oster static void FreeRegionInfo(RF_Raid_t * raidPtr, RF_RegionId_t regionID); 66 1.1 oster static void rf_ShutdownParityLogging(RF_ThreadArg_t arg); 67 1.1 oster static void rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg); 68 1.1 oster static void rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg); 69 1.1 oster static void rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg); 70 1.1 oster static void rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg); 71 1.1 oster static void rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg); 72 1.1 oster 73 1.23 perry int 74 1.3 oster rf_ConfigureParityLogging( 75 1.3 oster RF_ShutdownList_t ** listp, 76 1.3 oster RF_Raid_t * raidPtr, 77 1.27 christos RF_Config_t * cfgPtr) 78 1.3 oster { 79 1.3 oster int i, j, startdisk, rc; 80 1.3 oster RF_SectorCount_t totalLogCapacity, fragmentation, lastRegionCapacity; 81 1.3 oster RF_SectorCount_t parityBufferCapacity, maxRegionParityRange; 82 1.3 oster RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 83 1.3 oster RF_ParityLoggingConfigInfo_t *info; 84 1.3 oster RF_ParityLog_t *l = NULL, *next; 85 1.28 christos void *lHeapPtr; 86 1.3 oster 87 1.5 oster if (rf_numParityRegions <= 0) 88 1.5 oster return(EINVAL); 89 1.5 oster 90 1.3 oster /* 91 1.3 oster * We create multiple entries on the shutdown list here, since 92 1.3 oster * this configuration routine is fairly complicated in and of 93 1.3 oster * itself, and this makes backing out of a failed configuration 94 1.3 oster * much simpler. 95 1.3 oster */ 96 1.3 oster 97 1.3 oster raidPtr->numSectorsPerLog = RF_DEFAULT_NUM_SECTORS_PER_LOG; 98 1.3 oster 99 1.3 oster /* create a parity logging configuration structure */ 100 1.35 christos info = RF_MallocAndAdd(sizeof(*info), raidPtr->cleanupList); 101 1.3 oster if (info == NULL) 102 1.3 oster return (ENOMEM); 103 1.3 oster layoutPtr->layoutSpecificInfo = (void *) info; 104 1.3 oster 105 1.3 oster /* the stripe identifier must identify the disks in each stripe, IN 106 1.3 oster * THE ORDER THAT THEY APPEAR IN THE STRIPE. */ 107 1.23 perry info->stripeIdentifier = rf_make_2d_array((raidPtr->numCol), 108 1.23 perry (raidPtr->numCol), 109 1.8 oster raidPtr->cleanupList); 110 1.3 oster if (info->stripeIdentifier == NULL) 111 1.3 oster return (ENOMEM); 112 1.3 oster 113 1.3 oster startdisk = 0; 114 1.3 oster for (i = 0; i < (raidPtr->numCol); i++) { 115 1.3 oster for (j = 0; j < (raidPtr->numCol); j++) { 116 1.23 perry info->stripeIdentifier[i][j] = (startdisk + j) % 117 1.8 oster (raidPtr->numCol - 1); 118 1.3 oster } 119 1.3 oster if ((--startdisk) < 0) 120 1.3 oster startdisk = raidPtr->numCol - 1 - 1; 121 1.3 oster } 122 1.3 oster 123 1.3 oster /* fill in the remaining layout parameters */ 124 1.3 oster layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; 125 1.3 oster layoutPtr->numParityCol = 1; 126 1.3 oster layoutPtr->numParityLogCol = 1; 127 1.23 perry layoutPtr->numDataCol = raidPtr->numCol - layoutPtr->numParityCol - 128 1.8 oster layoutPtr->numParityLogCol; 129 1.23 perry layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * 130 1.8 oster layoutPtr->sectorsPerStripeUnit; 131 1.3 oster layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; 132 1.23 perry raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * 133 1.8 oster layoutPtr->sectorsPerStripeUnit; 134 1.3 oster 135 1.23 perry raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * 136 1.8 oster layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; 137 1.3 oster 138 1.3 oster /* configure parity log parameters 139 1.23 perry * 140 1.23 perry * parameter comment/constraints 141 1.23 perry * ------------------------------------------- 142 1.23 perry * numParityRegions* all regions (except possibly last) 143 1.23 perry * of equal size 144 1.23 perry * totalInCoreLogCapacity* amount of memory in bytes available 145 1.23 perry * for in-core logs (default 1 MB) 146 1.23 perry * numSectorsPerLog# capacity of an in-core log in sectors 147 1.5 oster * (1 * disk track) 148 1.5 oster * numParityLogs total number of in-core logs, 149 1.23 perry * should be at least numParityRegions 150 1.23 perry * regionLogCapacity size of a region log (except possibly 151 1.23 perry * last one) in sectors 152 1.5 oster * totalLogCapacity total amount of log space in sectors 153 1.23 perry * 154 1.23 perry * where '*' denotes a user settable parameter. 155 1.23 perry * Note that logs are fixed to be the size of a disk track, 156 1.5 oster * value #defined in rf_paritylog.h 157 1.23 perry * 158 1.3 oster */ 159 1.3 oster 160 1.3 oster totalLogCapacity = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit * layoutPtr->numParityLogCol; 161 1.3 oster raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions; 162 1.3 oster if (rf_parityLogDebug) 163 1.3 oster printf("bytes per sector %d\n", raidPtr->bytesPerSector); 164 1.3 oster 165 1.3 oster /* reduce fragmentation within a disk region by adjusting the number 166 1.3 oster * of regions in an attempt to allow an integral number of logs to fit 167 1.3 oster * into a disk region */ 168 1.3 oster fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog; 169 1.3 oster if (fragmentation > 0) 170 1.3 oster for (i = 1; i < (raidPtr->numSectorsPerLog / 2); i++) { 171 1.23 perry if (((totalLogCapacity / (rf_numParityRegions + i)) % 172 1.8 oster raidPtr->numSectorsPerLog) < fragmentation) { 173 1.3 oster rf_numParityRegions++; 174 1.8 oster raidPtr->regionLogCapacity = totalLogCapacity / 175 1.8 oster rf_numParityRegions; 176 1.23 perry fragmentation = raidPtr->regionLogCapacity % 177 1.8 oster raidPtr->numSectorsPerLog; 178 1.3 oster } 179 1.23 perry if (((totalLogCapacity / (rf_numParityRegions - i)) % 180 1.8 oster raidPtr->numSectorsPerLog) < fragmentation) { 181 1.3 oster rf_numParityRegions--; 182 1.8 oster raidPtr->regionLogCapacity = totalLogCapacity / 183 1.8 oster rf_numParityRegions; 184 1.23 perry fragmentation = raidPtr->regionLogCapacity % 185 1.8 oster raidPtr->numSectorsPerLog; 186 1.3 oster } 187 1.3 oster } 188 1.3 oster /* ensure integral number of regions per log */ 189 1.23 perry raidPtr->regionLogCapacity = (raidPtr->regionLogCapacity / 190 1.23 perry raidPtr->numSectorsPerLog) * 191 1.8 oster raidPtr->numSectorsPerLog; 192 1.3 oster 193 1.23 perry raidPtr->numParityLogs = rf_totalInCoreLogCapacity / 194 1.8 oster (raidPtr->bytesPerSector * raidPtr->numSectorsPerLog); 195 1.3 oster /* to avoid deadlock, must ensure that enough logs exist for each 196 1.3 oster * region to have one simultaneously */ 197 1.3 oster if (raidPtr->numParityLogs < rf_numParityRegions) 198 1.3 oster raidPtr->numParityLogs = rf_numParityRegions; 199 1.3 oster 200 1.3 oster /* create region information structs */ 201 1.9 oster printf("Allocating %d bytes for in-core parity region info\n", 202 1.10 oster (int) (rf_numParityRegions * sizeof(RF_RegionInfo_t))); 203 1.35 christos raidPtr->regionInfo = RF_Malloc( 204 1.35 christos rf_numParityRegions * sizeof(*raidPtr->regionInfo)); 205 1.3 oster if (raidPtr->regionInfo == NULL) 206 1.3 oster return (ENOMEM); 207 1.3 oster 208 1.3 oster /* last region may not be full capacity */ 209 1.3 oster lastRegionCapacity = raidPtr->regionLogCapacity; 210 1.23 perry while ((rf_numParityRegions - 1) * raidPtr->regionLogCapacity + 211 1.8 oster lastRegionCapacity > totalLogCapacity) 212 1.23 perry lastRegionCapacity = lastRegionCapacity - 213 1.8 oster raidPtr->numSectorsPerLog; 214 1.1 oster 215 1.23 perry raidPtr->regionParityRange = raidPtr->sectorsPerDisk / 216 1.8 oster rf_numParityRegions; 217 1.3 oster maxRegionParityRange = raidPtr->regionParityRange; 218 1.1 oster 219 1.1 oster /* i can't remember why this line is in the code -wvcii 6/30/95 */ 220 1.1 oster /* if (raidPtr->sectorsPerDisk % rf_numParityRegions > 0) 221 1.1 oster regionParityRange++; */ 222 1.1 oster 223 1.3 oster /* build pool of unused parity logs */ 224 1.9 oster printf("Allocating %d bytes for %d parity logs\n", 225 1.23 perry raidPtr->numParityLogs * raidPtr->numSectorsPerLog * 226 1.9 oster raidPtr->bytesPerSector, 227 1.9 oster raidPtr->numParityLogs); 228 1.35 christos raidPtr->parityLogBufferHeap = RF_Malloc(raidPtr->numParityLogs 229 1.35 christos * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); 230 1.3 oster if (raidPtr->parityLogBufferHeap == NULL) 231 1.3 oster return (ENOMEM); 232 1.3 oster lHeapPtr = raidPtr->parityLogBufferHeap; 233 1.33 mrg rf_init_mutex2(raidPtr->parityLogPool.mutex, IPL_VM); 234 1.3 oster for (i = 0; i < raidPtr->numParityLogs; i++) { 235 1.3 oster if (i == 0) { 236 1.35 christos raidPtr->parityLogPool.parityLogs = 237 1.35 christos RF_Malloc( 238 1.35 christos sizeof(*raidPtr->parityLogPool.parityLogs)); 239 1.3 oster if (raidPtr->parityLogPool.parityLogs == NULL) { 240 1.23 perry RF_Free(raidPtr->parityLogBufferHeap, 241 1.23 perry raidPtr->numParityLogs * 242 1.23 perry raidPtr->numSectorsPerLog * 243 1.8 oster raidPtr->bytesPerSector); 244 1.3 oster return (ENOMEM); 245 1.3 oster } 246 1.3 oster l = raidPtr->parityLogPool.parityLogs; 247 1.3 oster } else { 248 1.35 christos l->next = RF_Malloc(sizeof(*l->next)); 249 1.3 oster if (l->next == NULL) { 250 1.23 perry RF_Free(raidPtr->parityLogBufferHeap, 251 1.23 perry raidPtr->numParityLogs * 252 1.23 perry raidPtr->numSectorsPerLog * 253 1.8 oster raidPtr->bytesPerSector); 254 1.23 perry for (l = raidPtr->parityLogPool.parityLogs; 255 1.8 oster l; 256 1.8 oster l = next) { 257 1.3 oster next = l->next; 258 1.3 oster if (l->records) 259 1.3 oster RF_Free(l->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t))); 260 1.3 oster RF_Free(l, sizeof(RF_ParityLog_t)); 261 1.3 oster } 262 1.3 oster return (ENOMEM); 263 1.3 oster } 264 1.3 oster l = l->next; 265 1.3 oster } 266 1.3 oster l->bufPtr = lHeapPtr; 267 1.28 christos lHeapPtr = (char *)lHeapPtr + raidPtr->numSectorsPerLog * 268 1.8 oster raidPtr->bytesPerSector; 269 1.35 christos l->records = RF_Malloc(raidPtr->numSectorsPerLog * 270 1.35 christos sizeof(*l->records)); 271 1.3 oster if (l->records == NULL) { 272 1.23 perry RF_Free(raidPtr->parityLogBufferHeap, 273 1.23 perry raidPtr->numParityLogs * 274 1.23 perry raidPtr->numSectorsPerLog * 275 1.8 oster raidPtr->bytesPerSector); 276 1.23 perry for (l = raidPtr->parityLogPool.parityLogs; 277 1.23 perry l; 278 1.8 oster l = next) { 279 1.3 oster next = l->next; 280 1.3 oster if (l->records) 281 1.23 perry RF_Free(l->records, 282 1.23 perry (raidPtr->numSectorsPerLog * 283 1.8 oster sizeof(RF_ParityLogRecord_t))); 284 1.3 oster RF_Free(l, sizeof(RF_ParityLog_t)); 285 1.3 oster } 286 1.3 oster return (ENOMEM); 287 1.3 oster } 288 1.3 oster } 289 1.22 oster rf_ShutdownCreate(listp, rf_ShutdownParityLoggingPool, raidPtr); 290 1.3 oster /* build pool of region buffers */ 291 1.34 mrg rf_init_mutex2(raidPtr->regionBufferPool.mutex, IPL_VM); 292 1.34 mrg rf_init_cond2(raidPtr->regionBufferPool.cond, "rfrbpl"); 293 1.23 perry raidPtr->regionBufferPool.bufferSize = raidPtr->regionLogCapacity * 294 1.8 oster raidPtr->bytesPerSector; 295 1.23 perry printf("regionBufferPool.bufferSize %d\n", 296 1.8 oster raidPtr->regionBufferPool.bufferSize); 297 1.8 oster 298 1.8 oster /* for now, only one region at a time may be reintegrated */ 299 1.23 perry raidPtr->regionBufferPool.totalBuffers = 1; 300 1.8 oster 301 1.23 perry raidPtr->regionBufferPool.availableBuffers = 302 1.8 oster raidPtr->regionBufferPool.totalBuffers; 303 1.3 oster raidPtr->regionBufferPool.availBuffersIndex = 0; 304 1.3 oster raidPtr->regionBufferPool.emptyBuffersIndex = 0; 305 1.9 oster printf("Allocating %d bytes for regionBufferPool\n", 306 1.23 perry (int) (raidPtr->regionBufferPool.totalBuffers * 307 1.28 christos sizeof(void *))); 308 1.35 christos raidPtr->regionBufferPool.buffers = RF_Malloc( 309 1.35 christos raidPtr->regionBufferPool.totalBuffers * 310 1.35 christos sizeof(*raidPtr->regionBufferPool.buffers)); 311 1.3 oster if (raidPtr->regionBufferPool.buffers == NULL) { 312 1.3 oster return (ENOMEM); 313 1.3 oster } 314 1.3 oster for (i = 0; i < raidPtr->regionBufferPool.totalBuffers; i++) { 315 1.9 oster printf("Allocating %d bytes for regionBufferPool#%d\n", 316 1.23 perry (int) (raidPtr->regionBufferPool.bufferSize * 317 1.10 oster sizeof(char)), i); 318 1.35 christos raidPtr->regionBufferPool.buffers[i] = 319 1.35 christos RF_Malloc(raidPtr->regionBufferPool.bufferSize); 320 1.7 oster if (raidPtr->regionBufferPool.buffers[i] == NULL) { 321 1.3 oster for (j = 0; j < i; j++) { 322 1.23 perry RF_Free(raidPtr->regionBufferPool.buffers[i], 323 1.8 oster raidPtr->regionBufferPool.bufferSize * 324 1.8 oster sizeof(char)); 325 1.3 oster } 326 1.23 perry RF_Free(raidPtr->regionBufferPool.buffers, 327 1.23 perry raidPtr->regionBufferPool.totalBuffers * 328 1.28 christos sizeof(void *)); 329 1.3 oster return (ENOMEM); 330 1.3 oster } 331 1.3 oster printf("raidPtr->regionBufferPool.buffers[%d] = %lx\n", i, 332 1.3 oster (long) raidPtr->regionBufferPool.buffers[i]); 333 1.3 oster } 334 1.23 perry rf_ShutdownCreate(listp, 335 1.22 oster rf_ShutdownParityLoggingRegionBufferPool, 336 1.22 oster raidPtr); 337 1.3 oster /* build pool of parity buffers */ 338 1.3 oster parityBufferCapacity = maxRegionParityRange; 339 1.34 mrg rf_init_mutex2(raidPtr->parityBufferPool.mutex, IPL_VM); 340 1.34 mrg rf_init_cond2(raidPtr->parityBufferPool.cond, "rfpbpl"); 341 1.23 perry raidPtr->parityBufferPool.bufferSize = parityBufferCapacity * 342 1.8 oster raidPtr->bytesPerSector; 343 1.23 perry printf("parityBufferPool.bufferSize %d\n", 344 1.8 oster raidPtr->parityBufferPool.bufferSize); 345 1.8 oster 346 1.8 oster /* for now, only one region at a time may be reintegrated */ 347 1.23 perry raidPtr->parityBufferPool.totalBuffers = 1; 348 1.8 oster 349 1.23 perry raidPtr->parityBufferPool.availableBuffers = 350 1.8 oster raidPtr->parityBufferPool.totalBuffers; 351 1.3 oster raidPtr->parityBufferPool.availBuffersIndex = 0; 352 1.3 oster raidPtr->parityBufferPool.emptyBuffersIndex = 0; 353 1.9 oster printf("Allocating %d bytes for parityBufferPool of %d units\n", 354 1.23 perry (int) (raidPtr->parityBufferPool.totalBuffers * 355 1.28 christos sizeof(void *)), 356 1.9 oster raidPtr->parityBufferPool.totalBuffers ); 357 1.35 christos raidPtr->parityBufferPool.buffers = RF_Malloc( 358 1.35 christos raidPtr->parityBufferPool.totalBuffers * 359 1.35 christos sizeof(*raidPtr->parityBufferPool.buffers)); 360 1.3 oster if (raidPtr->parityBufferPool.buffers == NULL) { 361 1.3 oster return (ENOMEM); 362 1.3 oster } 363 1.3 oster for (i = 0; i < raidPtr->parityBufferPool.totalBuffers; i++) { 364 1.9 oster printf("Allocating %d bytes for parityBufferPool#%d\n", 365 1.23 perry (int) (raidPtr->parityBufferPool.bufferSize * 366 1.10 oster sizeof(char)),i); 367 1.35 christos raidPtr->parityBufferPool.buffers[i] = RF_Malloc( 368 1.35 christos raidPtr->parityBufferPool.bufferSize); 369 1.3 oster if (raidPtr->parityBufferPool.buffers == NULL) { 370 1.3 oster for (j = 0; j < i; j++) { 371 1.23 perry RF_Free(raidPtr->parityBufferPool.buffers[i], 372 1.23 perry raidPtr->regionBufferPool.bufferSize * 373 1.8 oster sizeof(char)); 374 1.3 oster } 375 1.23 perry RF_Free(raidPtr->parityBufferPool.buffers, 376 1.23 perry raidPtr->regionBufferPool.totalBuffers * 377 1.28 christos sizeof(void *)); 378 1.3 oster return (ENOMEM); 379 1.3 oster } 380 1.3 oster printf("parityBufferPool.buffers[%d] = %lx\n", i, 381 1.3 oster (long) raidPtr->parityBufferPool.buffers[i]); 382 1.3 oster } 383 1.23 perry rf_ShutdownCreate(listp, 384 1.23 perry rf_ShutdownParityLoggingParityBufferPool, 385 1.22 oster raidPtr); 386 1.3 oster /* initialize parityLogDiskQueue */ 387 1.30 mrg rf_init_mutex2(raidPtr->parityLogDiskQueue.mutex, IPL_VM); 388 1.34 mrg rf_init_cond2(raidPtr->parityLogDiskQueue.cond, "rfpldq"); 389 1.3 oster raidPtr->parityLogDiskQueue.flushQueue = NULL; 390 1.3 oster raidPtr->parityLogDiskQueue.reintQueue = NULL; 391 1.3 oster raidPtr->parityLogDiskQueue.bufHead = NULL; 392 1.3 oster raidPtr->parityLogDiskQueue.bufTail = NULL; 393 1.3 oster raidPtr->parityLogDiskQueue.reintHead = NULL; 394 1.3 oster raidPtr->parityLogDiskQueue.reintTail = NULL; 395 1.3 oster raidPtr->parityLogDiskQueue.logBlockHead = NULL; 396 1.3 oster raidPtr->parityLogDiskQueue.logBlockTail = NULL; 397 1.3 oster raidPtr->parityLogDiskQueue.reintBlockHead = NULL; 398 1.3 oster raidPtr->parityLogDiskQueue.reintBlockTail = NULL; 399 1.3 oster raidPtr->parityLogDiskQueue.freeDataList = NULL; 400 1.3 oster raidPtr->parityLogDiskQueue.freeCommonList = NULL; 401 1.3 oster 402 1.23 perry rf_ShutdownCreate(listp, 403 1.23 perry rf_ShutdownParityLoggingDiskQueue, 404 1.22 oster raidPtr); 405 1.3 oster for (i = 0; i < rf_numParityRegions; i++) { 406 1.32 mrg rf_init_mutex2(raidPtr->regionInfo[i].mutex, IPL_VM); 407 1.31 mrg rf_init_mutex2(raidPtr->regionInfo[i].reintMutex, IPL_VM); 408 1.3 oster raidPtr->regionInfo[i].reintInProgress = RF_FALSE; 409 1.23 perry raidPtr->regionInfo[i].regionStartAddr = 410 1.8 oster raidPtr->regionLogCapacity * i; 411 1.23 perry raidPtr->regionInfo[i].parityStartAddr = 412 1.8 oster raidPtr->regionParityRange * i; 413 1.3 oster if (i < rf_numParityRegions - 1) { 414 1.23 perry raidPtr->regionInfo[i].capacity = 415 1.8 oster raidPtr->regionLogCapacity; 416 1.23 perry raidPtr->regionInfo[i].numSectorsParity = 417 1.8 oster raidPtr->regionParityRange; 418 1.3 oster } else { 419 1.23 perry raidPtr->regionInfo[i].capacity = 420 1.8 oster lastRegionCapacity; 421 1.23 perry raidPtr->regionInfo[i].numSectorsParity = 422 1.23 perry raidPtr->sectorsPerDisk - 423 1.8 oster raidPtr->regionParityRange * i; 424 1.23 perry if (raidPtr->regionInfo[i].numSectorsParity > 425 1.8 oster maxRegionParityRange) 426 1.23 perry maxRegionParityRange = 427 1.8 oster raidPtr->regionInfo[i].numSectorsParity; 428 1.3 oster } 429 1.3 oster raidPtr->regionInfo[i].diskCount = 0; 430 1.23 perry RF_ASSERT(raidPtr->regionInfo[i].capacity + 431 1.23 perry raidPtr->regionInfo[i].regionStartAddr <= 432 1.8 oster totalLogCapacity); 433 1.23 perry RF_ASSERT(raidPtr->regionInfo[i].parityStartAddr + 434 1.23 perry raidPtr->regionInfo[i].numSectorsParity <= 435 1.8 oster raidPtr->sectorsPerDisk); 436 1.9 oster printf("Allocating %d bytes for region %d\n", 437 1.9 oster (int) (raidPtr->regionInfo[i].capacity * 438 1.9 oster sizeof(RF_DiskMap_t)), i); 439 1.35 christos raidPtr->regionInfo[i].diskMap = RF_Malloc( 440 1.35 christos raidPtr->regionInfo[i].capacity * 441 1.35 christos sizeof(*raidPtr->regionInfo[i].diskMap)); 442 1.3 oster if (raidPtr->regionInfo[i].diskMap == NULL) { 443 1.3 oster for (j = 0; j < i; j++) 444 1.3 oster FreeRegionInfo(raidPtr, j); 445 1.23 perry RF_Free(raidPtr->regionInfo, 446 1.23 perry (rf_numParityRegions * 447 1.8 oster sizeof(RF_RegionInfo_t))); 448 1.3 oster return (ENOMEM); 449 1.3 oster } 450 1.3 oster raidPtr->regionInfo[i].loggingEnabled = RF_FALSE; 451 1.3 oster raidPtr->regionInfo[i].coreLog = NULL; 452 1.3 oster } 453 1.22 oster rf_ShutdownCreate(listp, 454 1.23 perry rf_ShutdownParityLoggingRegionInfo, 455 1.22 oster raidPtr); 456 1.3 oster RF_ASSERT(raidPtr->parityLogDiskQueue.threadState == 0); 457 1.3 oster raidPtr->parityLogDiskQueue.threadState = RF_PLOG_CREATED; 458 1.23 perry rc = RF_CREATE_THREAD(raidPtr->pLogDiskThreadHandle, 459 1.8 oster rf_ParityLoggingDiskManager, raidPtr,"rf_log"); 460 1.3 oster if (rc) { 461 1.3 oster raidPtr->parityLogDiskQueue.threadState = 0; 462 1.3 oster RF_ERRORMSG3("Unable to create parity logging disk thread file %s line %d rc=%d\n", 463 1.3 oster __FILE__, __LINE__, rc); 464 1.3 oster return (ENOMEM); 465 1.3 oster } 466 1.3 oster /* wait for thread to start */ 467 1.30 mrg rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 468 1.3 oster while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_RUNNING)) { 469 1.30 mrg rf_wait_cond2(raidPtr->parityLogDiskQueue.cond, 470 1.30 mrg raidPtr->parityLogDiskQueue.mutex); 471 1.3 oster } 472 1.30 mrg rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 473 1.3 oster 474 1.22 oster rf_ShutdownCreate(listp, rf_ShutdownParityLogging, raidPtr); 475 1.3 oster if (rf_parityLogDebug) { 476 1.3 oster printf(" size of disk log in sectors: %d\n", 477 1.3 oster (int) totalLogCapacity); 478 1.3 oster printf(" total number of parity regions is %d\n", (int) rf_numParityRegions); 479 1.3 oster printf(" nominal sectors of log per parity region is %d\n", (int) raidPtr->regionLogCapacity); 480 1.3 oster printf(" nominal region fragmentation is %d sectors\n", (int) fragmentation); 481 1.3 oster printf(" total number of parity logs is %d\n", raidPtr->numParityLogs); 482 1.3 oster printf(" parity log size is %d sectors\n", raidPtr->numSectorsPerLog); 483 1.3 oster printf(" total in-core log space is %d bytes\n", (int) rf_totalInCoreLogCapacity); 484 1.3 oster } 485 1.3 oster rf_EnableParityLogging(raidPtr); 486 1.3 oster 487 1.3 oster return (0); 488 1.1 oster } 489 1.1 oster 490 1.23 perry static void 491 1.3 oster FreeRegionInfo( 492 1.3 oster RF_Raid_t * raidPtr, 493 1.3 oster RF_RegionId_t regionID) 494 1.3 oster { 495 1.23 perry RF_Free(raidPtr->regionInfo[regionID].diskMap, 496 1.23 perry (raidPtr->regionInfo[regionID].capacity * 497 1.8 oster sizeof(RF_DiskMap_t))); 498 1.3 oster if (!rf_forceParityLogReint && raidPtr->regionInfo[regionID].coreLog) { 499 1.23 perry rf_ReleaseParityLogs(raidPtr, 500 1.8 oster raidPtr->regionInfo[regionID].coreLog); 501 1.3 oster raidPtr->regionInfo[regionID].coreLog = NULL; 502 1.3 oster } else { 503 1.3 oster RF_ASSERT(raidPtr->regionInfo[regionID].coreLog == NULL); 504 1.3 oster RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == 0); 505 1.3 oster } 506 1.31 mrg rf_destroy_mutex2(raidPtr->regionInfo[regionID].reintMutex); 507 1.32 mrg rf_destroy_mutex2(raidPtr->regionInfo[regionID].mutex); 508 1.3 oster } 509 1.3 oster 510 1.3 oster 511 1.23 perry static void 512 1.33 mrg FreeParityLogQueue(RF_Raid_t * raidPtr) 513 1.3 oster { 514 1.3 oster RF_ParityLog_t *l1, *l2; 515 1.3 oster 516 1.33 mrg l1 = raidPtr->parityLogPool.parityLogs; 517 1.3 oster while (l1) { 518 1.3 oster l2 = l1; 519 1.3 oster l1 = l2->next; 520 1.23 perry RF_Free(l2->records, (raidPtr->numSectorsPerLog * 521 1.8 oster sizeof(RF_ParityLogRecord_t))); 522 1.3 oster RF_Free(l2, sizeof(RF_ParityLog_t)); 523 1.3 oster } 524 1.33 mrg rf_destroy_mutex2(raidPtr->parityLogPool.mutex); 525 1.3 oster } 526 1.3 oster 527 1.3 oster 528 1.23 perry static void 529 1.3 oster FreeRegionBufferQueue(RF_RegionBufferQueue_t * queue) 530 1.1 oster { 531 1.3 oster int i; 532 1.3 oster 533 1.3 oster if (queue->availableBuffers != queue->totalBuffers) { 534 1.3 oster printf("Attempt to free region queue which is still in use!\n"); 535 1.3 oster RF_ASSERT(0); 536 1.3 oster } 537 1.3 oster for (i = 0; i < queue->totalBuffers; i++) 538 1.3 oster RF_Free(queue->buffers[i], queue->bufferSize); 539 1.28 christos RF_Free(queue->buffers, queue->totalBuffers * sizeof(void *)); 540 1.34 mrg rf_destroy_mutex2(queue->mutex); 541 1.34 mrg rf_destroy_cond2(queue->cond); 542 1.3 oster } 543 1.3 oster 544 1.23 perry static void 545 1.3 oster rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg) 546 1.3 oster { 547 1.3 oster RF_Raid_t *raidPtr; 548 1.3 oster RF_RegionId_t i; 549 1.3 oster 550 1.3 oster raidPtr = (RF_Raid_t *) arg; 551 1.3 oster if (rf_parityLogDebug) { 552 1.23 perry printf("raid%d: ShutdownParityLoggingRegionInfo\n", 553 1.6 oster raidPtr->raidid); 554 1.3 oster } 555 1.3 oster /* free region information structs */ 556 1.3 oster for (i = 0; i < rf_numParityRegions; i++) 557 1.3 oster FreeRegionInfo(raidPtr, i); 558 1.23 perry RF_Free(raidPtr->regionInfo, (rf_numParityRegions * 559 1.8 oster sizeof(raidPtr->regionInfo))); 560 1.3 oster raidPtr->regionInfo = NULL; 561 1.3 oster } 562 1.3 oster 563 1.23 perry static void 564 1.3 oster rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg) 565 1.3 oster { 566 1.3 oster RF_Raid_t *raidPtr; 567 1.3 oster 568 1.3 oster raidPtr = (RF_Raid_t *) arg; 569 1.3 oster if (rf_parityLogDebug) { 570 1.6 oster printf("raid%d: ShutdownParityLoggingPool\n", raidPtr->raidid); 571 1.3 oster } 572 1.3 oster /* free contents of parityLogPool */ 573 1.33 mrg FreeParityLogQueue(raidPtr); 574 1.23 perry RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * 575 1.8 oster raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); 576 1.1 oster } 577 1.1 oster 578 1.23 perry static void 579 1.3 oster rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg) 580 1.1 oster { 581 1.3 oster RF_Raid_t *raidPtr; 582 1.3 oster 583 1.3 oster raidPtr = (RF_Raid_t *) arg; 584 1.3 oster if (rf_parityLogDebug) { 585 1.23 perry printf("raid%d: ShutdownParityLoggingRegionBufferPool\n", 586 1.6 oster raidPtr->raidid); 587 1.3 oster } 588 1.3 oster FreeRegionBufferQueue(&raidPtr->regionBufferPool); 589 1.1 oster } 590 1.1 oster 591 1.23 perry static void 592 1.3 oster rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg) 593 1.3 oster { 594 1.3 oster RF_Raid_t *raidPtr; 595 1.3 oster 596 1.3 oster raidPtr = (RF_Raid_t *) arg; 597 1.3 oster if (rf_parityLogDebug) { 598 1.6 oster printf("raid%d: ShutdownParityLoggingParityBufferPool\n", 599 1.6 oster raidPtr->raidid); 600 1.3 oster } 601 1.3 oster FreeRegionBufferQueue(&raidPtr->parityBufferPool); 602 1.3 oster } 603 1.3 oster 604 1.23 perry static void 605 1.3 oster rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg) 606 1.3 oster { 607 1.3 oster RF_ParityLogData_t *d; 608 1.3 oster RF_CommonLogData_t *c; 609 1.3 oster RF_Raid_t *raidPtr; 610 1.3 oster 611 1.3 oster raidPtr = (RF_Raid_t *) arg; 612 1.3 oster if (rf_parityLogDebug) { 613 1.6 oster printf("raid%d: ShutdownParityLoggingDiskQueue\n", 614 1.6 oster raidPtr->raidid); 615 1.3 oster } 616 1.3 oster /* free disk manager stuff */ 617 1.3 oster RF_ASSERT(raidPtr->parityLogDiskQueue.bufHead == NULL); 618 1.3 oster RF_ASSERT(raidPtr->parityLogDiskQueue.bufTail == NULL); 619 1.3 oster RF_ASSERT(raidPtr->parityLogDiskQueue.reintHead == NULL); 620 1.3 oster RF_ASSERT(raidPtr->parityLogDiskQueue.reintTail == NULL); 621 1.3 oster while (raidPtr->parityLogDiskQueue.freeDataList) { 622 1.3 oster d = raidPtr->parityLogDiskQueue.freeDataList; 623 1.23 perry raidPtr->parityLogDiskQueue.freeDataList = 624 1.8 oster raidPtr->parityLogDiskQueue.freeDataList->next; 625 1.3 oster RF_Free(d, sizeof(RF_ParityLogData_t)); 626 1.3 oster } 627 1.3 oster while (raidPtr->parityLogDiskQueue.freeCommonList) { 628 1.3 oster c = raidPtr->parityLogDiskQueue.freeCommonList; 629 1.29 mrg raidPtr->parityLogDiskQueue.freeCommonList = c->next; 630 1.29 mrg /* init is in rf_paritylog.c */ 631 1.29 mrg rf_destroy_mutex2(c->mutex); 632 1.3 oster RF_Free(c, sizeof(RF_CommonLogData_t)); 633 1.3 oster } 634 1.30 mrg 635 1.30 mrg rf_destroy_mutex2(raidPtr->parityLogDiskQueue.mutex); 636 1.30 mrg rf_destroy_cond2(raidPtr->parityLogDiskQueue.cond); 637 1.3 oster } 638 1.3 oster 639 1.23 perry static void 640 1.3 oster rf_ShutdownParityLogging(RF_ThreadArg_t arg) 641 1.3 oster { 642 1.3 oster RF_Raid_t *raidPtr; 643 1.3 oster 644 1.3 oster raidPtr = (RF_Raid_t *) arg; 645 1.3 oster if (rf_parityLogDebug) { 646 1.6 oster printf("raid%d: ShutdownParityLogging\n", raidPtr->raidid); 647 1.3 oster } 648 1.3 oster /* shutdown disk thread */ 649 1.3 oster /* This has the desirable side-effect of forcing all regions to be 650 1.3 oster * reintegrated. This is necessary since all parity log maps are 651 1.3 oster * currently held in volatile memory. */ 652 1.3 oster 653 1.30 mrg rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 654 1.3 oster raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_TERMINATE; 655 1.30 mrg rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); 656 1.30 mrg rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 657 1.3 oster /* 658 1.3 oster * pLogDiskThread will now terminate when queues are cleared 659 1.3 oster * now wait for it to be done 660 1.3 oster */ 661 1.30 mrg rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 662 1.3 oster while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_SHUTDOWN)) { 663 1.30 mrg rf_wait_cond2(raidPtr->parityLogDiskQueue.cond, 664 1.30 mrg raidPtr->parityLogDiskQueue.mutex); 665 1.3 oster } 666 1.30 mrg rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 667 1.3 oster if (rf_parityLogDebug) { 668 1.6 oster printf("raid%d: ShutdownParityLogging done (thread completed)\n", raidPtr->raidid); 669 1.3 oster } 670 1.3 oster } 671 1.3 oster 672 1.23 perry int 673 1.27 christos rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t * raidPtr) 674 1.3 oster { 675 1.3 oster return (20); 676 1.3 oster } 677 1.3 oster 678 1.23 perry RF_HeadSepLimit_t 679 1.27 christos rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t * raidPtr) 680 1.3 oster { 681 1.3 oster return (10); 682 1.3 oster } 683 1.1 oster /* return the region ID for a given RAID address */ 684 1.23 perry RF_RegionId_t 685 1.3 oster rf_MapRegionIDParityLogging( 686 1.3 oster RF_Raid_t * raidPtr, 687 1.3 oster RF_SectorNum_t address) 688 1.1 oster { 689 1.3 oster RF_RegionId_t regionID; 690 1.1 oster 691 1.1 oster /* regionID = address / (raidPtr->regionParityRange * raidPtr->Layout.numDataCol); */ 692 1.3 oster regionID = address / raidPtr->regionParityRange; 693 1.3 oster if (regionID == rf_numParityRegions) { 694 1.3 oster /* last region may be larger than other regions */ 695 1.3 oster regionID--; 696 1.3 oster } 697 1.3 oster RF_ASSERT(address >= raidPtr->regionInfo[regionID].parityStartAddr); 698 1.23 perry RF_ASSERT(address < raidPtr->regionInfo[regionID].parityStartAddr + 699 1.8 oster raidPtr->regionInfo[regionID].numSectorsParity); 700 1.3 oster RF_ASSERT(regionID < rf_numParityRegions); 701 1.3 oster return (regionID); 702 1.1 oster } 703 1.1 oster 704 1.1 oster 705 1.1 oster /* given a logical RAID sector, determine physical disk address of data */ 706 1.23 perry void 707 1.3 oster rf_MapSectorParityLogging( 708 1.3 oster RF_Raid_t * raidPtr, 709 1.3 oster RF_RaidAddr_t raidSector, 710 1.3 oster RF_RowCol_t * col, 711 1.3 oster RF_SectorNum_t * diskSector, 712 1.27 christos int remap) 713 1.3 oster { 714 1.23 perry RF_StripeNum_t SUID = raidSector / 715 1.8 oster raidPtr->Layout.sectorsPerStripeUnit; 716 1.3 oster /* *col = (SUID % (raidPtr->numCol - 717 1.3 oster * raidPtr->Layout.numParityLogCol)); */ 718 1.3 oster *col = SUID % raidPtr->Layout.numDataCol; 719 1.23 perry *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * 720 1.8 oster raidPtr->Layout.sectorsPerStripeUnit + 721 1.8 oster (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 722 1.1 oster } 723 1.1 oster 724 1.1 oster 725 1.1 oster /* given a logical RAID sector, determine physical disk address of parity */ 726 1.23 perry void 727 1.3 oster rf_MapParityParityLogging( 728 1.3 oster RF_Raid_t * raidPtr, 729 1.3 oster RF_RaidAddr_t raidSector, 730 1.3 oster RF_RowCol_t * col, 731 1.3 oster RF_SectorNum_t * diskSector, 732 1.27 christos int remap) 733 1.3 oster { 734 1.23 perry RF_StripeNum_t SUID = raidSector / 735 1.8 oster raidPtr->Layout.sectorsPerStripeUnit; 736 1.3 oster 737 1.3 oster /* *col = 738 1.3 oster * raidPtr->Layout.numDataCol-(SUID/raidPtr->Layout.numDataCol)%(raidPt 739 1.3 oster * r->numCol - raidPtr->Layout.numParityLogCol); */ 740 1.3 oster *col = raidPtr->Layout.numDataCol; 741 1.23 perry *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * 742 1.8 oster raidPtr->Layout.sectorsPerStripeUnit + 743 1.8 oster (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 744 1.1 oster } 745 1.1 oster 746 1.1 oster 747 1.1 oster /* given a regionID and sector offset, determine the physical disk address of the parity log */ 748 1.23 perry void 749 1.3 oster rf_MapLogParityLogging( 750 1.3 oster RF_Raid_t * raidPtr, 751 1.3 oster RF_RegionId_t regionID, 752 1.3 oster RF_SectorNum_t regionOffset, 753 1.3 oster RF_RowCol_t * col, 754 1.3 oster RF_SectorNum_t * startSector) 755 1.3 oster { 756 1.3 oster *col = raidPtr->numCol - 1; 757 1.3 oster *startSector = raidPtr->regionInfo[regionID].regionStartAddr + regionOffset; 758 1.1 oster } 759 1.1 oster 760 1.1 oster 761 1.8 oster /* given a regionID, determine the physical disk address of the logged 762 1.8 oster parity for that region */ 763 1.23 perry void 764 1.3 oster rf_MapRegionParity( 765 1.3 oster RF_Raid_t * raidPtr, 766 1.3 oster RF_RegionId_t regionID, 767 1.3 oster RF_RowCol_t * col, 768 1.3 oster RF_SectorNum_t * startSector, 769 1.3 oster RF_SectorCount_t * numSector) 770 1.3 oster { 771 1.3 oster *col = raidPtr->numCol - 2; 772 1.3 oster *startSector = raidPtr->regionInfo[regionID].parityStartAddr; 773 1.3 oster *numSector = raidPtr->regionInfo[regionID].numSectorsParity; 774 1.1 oster } 775 1.1 oster 776 1.1 oster 777 1.8 oster /* given a logical RAID address, determine the participating disks in 778 1.8 oster the stripe */ 779 1.23 perry void 780 1.3 oster rf_IdentifyStripeParityLogging( 781 1.3 oster RF_Raid_t * raidPtr, 782 1.3 oster RF_RaidAddr_t addr, 783 1.16 oster RF_RowCol_t ** diskids) 784 1.3 oster { 785 1.23 perry RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, 786 1.8 oster addr); 787 1.23 perry RF_ParityLoggingConfigInfo_t *info = (RF_ParityLoggingConfigInfo_t *) 788 1.8 oster raidPtr->Layout.layoutSpecificInfo; 789 1.3 oster *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; 790 1.1 oster } 791 1.1 oster 792 1.1 oster 793 1.23 perry void 794 1.3 oster rf_MapSIDToPSIDParityLogging( 795 1.27 christos RF_RaidLayout_t * layoutPtr, 796 1.3 oster RF_StripeNum_t stripeID, 797 1.3 oster RF_StripeNum_t * psID, 798 1.3 oster RF_ReconUnitNum_t * which_ru) 799 1.1 oster { 800 1.3 oster *which_ru = 0; 801 1.3 oster *psID = stripeID; 802 1.1 oster } 803 1.1 oster 804 1.1 oster 805 1.1 oster /* select an algorithm for performing an access. Returns two pointers, 806 1.1 oster * one to a function that will return information about the DAG, and 807 1.1 oster * another to a function that will create the dag. 808 1.1 oster */ 809 1.23 perry void 810 1.3 oster rf_ParityLoggingDagSelect( 811 1.3 oster RF_Raid_t * raidPtr, 812 1.3 oster RF_IoType_t type, 813 1.3 oster RF_AccessStripeMap_t * asmp, 814 1.3 oster RF_VoidFuncPtr * createFunc) 815 1.3 oster { 816 1.3 oster RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 817 1.3 oster RF_PhysDiskAddr_t *failedPDA = NULL; 818 1.16 oster RF_RowCol_t fcol; 819 1.3 oster RF_RowStatus_t rstat; 820 1.3 oster int prior_recon; 821 1.3 oster 822 1.3 oster RF_ASSERT(RF_IO_IS_R_OR_W(type)); 823 1.3 oster 824 1.3 oster if (asmp->numDataFailed + asmp->numParityFailed > 1) { 825 1.3 oster RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); 826 1.15 oster *createFunc = NULL; 827 1.3 oster return; 828 1.3 oster } else 829 1.3 oster if (asmp->numDataFailed + asmp->numParityFailed == 1) { 830 1.3 oster 831 1.3 oster /* if under recon & already reconstructed, redirect 832 1.3 oster * the access to the spare drive and eliminate the 833 1.3 oster * failure indication */ 834 1.3 oster failedPDA = asmp->failedPDAs[0]; 835 1.3 oster fcol = failedPDA->col; 836 1.16 oster rstat = raidPtr->status; 837 1.3 oster prior_recon = (rstat == rf_rs_reconfigured) || ( 838 1.3 oster (rstat == rf_rs_reconstructing) ? 839 1.16 oster rf_CheckRUReconstructed(raidPtr->reconControl->reconMap, failedPDA->startSector) : 0 840 1.3 oster ); 841 1.3 oster if (prior_recon) { 842 1.16 oster RF_RowCol_t oc = failedPDA->col; 843 1.3 oster RF_SectorNum_t oo = failedPDA->startSector; 844 1.23 perry if (layoutPtr->map->flags & 845 1.23 perry RF_DISTRIBUTE_SPARE) { 846 1.8 oster /* redirect to dist spare space */ 847 1.3 oster 848 1.3 oster if (failedPDA == asmp->parityInfo) { 849 1.3 oster 850 1.3 oster /* parity has failed */ 851 1.16 oster (layoutPtr->map->MapParity) (raidPtr, failedPDA->raidAddress, 852 1.3 oster &failedPDA->col, &failedPDA->startSector, RF_REMAP); 853 1.3 oster 854 1.3 oster if (asmp->parityInfo->next) { /* redir 2nd component, 855 1.3 oster * if any */ 856 1.3 oster RF_PhysDiskAddr_t *p = asmp->parityInfo->next; 857 1.3 oster RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit; 858 1.3 oster p->col = failedPDA->col; 859 1.3 oster p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) + 860 1.3 oster SUoffs; /* cheating: 861 1.3 oster * startSector is not 862 1.3 oster * really a RAID address */ 863 1.3 oster } 864 1.3 oster } else 865 1.3 oster if (asmp->parityInfo->next && failedPDA == asmp->parityInfo->next) { 866 1.3 oster RF_ASSERT(0); /* should not ever 867 1.3 oster * happen */ 868 1.3 oster } else { 869 1.3 oster 870 1.3 oster /* data has failed */ 871 1.16 oster (layoutPtr->map->MapSector) (raidPtr, failedPDA->raidAddress, 872 1.3 oster &failedPDA->col, &failedPDA->startSector, RF_REMAP); 873 1.3 oster 874 1.3 oster } 875 1.3 oster 876 1.23 perry } else { 877 1.8 oster /* redirect to dedicated spare space */ 878 1.3 oster 879 1.16 oster failedPDA->col = raidPtr->Disks[fcol].spareCol; 880 1.3 oster 881 1.3 oster /* the parity may have two distinct 882 1.3 oster * components, both of which may need 883 1.3 oster * to be redirected */ 884 1.3 oster if (asmp->parityInfo->next) { 885 1.3 oster if (failedPDA == asmp->parityInfo) { 886 1.3 oster failedPDA->next->col = failedPDA->col; 887 1.3 oster } else 888 1.8 oster if (failedPDA == asmp->parityInfo->next) { /* paranoid: should never occur */ 889 1.3 oster asmp->parityInfo->col = failedPDA->col; 890 1.3 oster } 891 1.3 oster } 892 1.3 oster } 893 1.3 oster 894 1.3 oster RF_ASSERT(failedPDA->col != -1); 895 1.3 oster 896 1.3 oster if (rf_dagDebug || rf_mapDebug) { 897 1.16 oster printf("raid%d: Redirected type '%c' c %d o %ld -> c %d o %ld\n", 898 1.16 oster raidPtr->raidid, type, oc, (long) oo, failedPDA->col, (long) failedPDA->startSector); 899 1.3 oster } 900 1.3 oster asmp->numDataFailed = asmp->numParityFailed = 0; 901 1.3 oster } 902 1.3 oster } 903 1.3 oster if (type == RF_IO_TYPE_READ) { 904 1.3 oster 905 1.3 oster if (asmp->numDataFailed == 0) 906 1.3 oster *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; 907 1.3 oster else 908 1.3 oster *createFunc = (RF_VoidFuncPtr) rf_CreateRaidFiveDegradedReadDAG; 909 1.3 oster 910 1.3 oster } else { 911 1.3 oster 912 1.3 oster 913 1.3 oster /* if mirroring, always use large writes. If the access 914 1.3 oster * requires two distinct parity updates, always do a small 915 1.3 oster * write. If the stripe contains a failure but the access 916 1.3 oster * does not, do a small write. The first conditional 917 1.3 oster * (numStripeUnitsAccessed <= numDataCol/2) uses a 918 1.3 oster * less-than-or-equal rather than just a less-than because 919 1.3 oster * when G is 3 or 4, numDataCol/2 is 1, and I want 920 1.3 oster * single-stripe-unit updates to use just one disk. */ 921 1.3 oster if ((asmp->numDataFailed + asmp->numParityFailed) == 0) { 922 1.23 perry if (((asmp->numStripeUnitsAccessed <= 923 1.23 perry (layoutPtr->numDataCol / 2)) && 924 1.8 oster (layoutPtr->numDataCol != 1)) || 925 1.23 perry (asmp->parityInfo->next != NULL) || 926 1.8 oster rf_CheckStripeForFailures(raidPtr, asmp)) { 927 1.3 oster *createFunc = (RF_VoidFuncPtr) rf_CreateParityLoggingSmallWriteDAG; 928 1.3 oster } else 929 1.3 oster *createFunc = (RF_VoidFuncPtr) rf_CreateParityLoggingLargeWriteDAG; 930 1.3 oster } else 931 1.3 oster if (asmp->numParityFailed == 1) 932 1.3 oster *createFunc = (RF_VoidFuncPtr) rf_CreateNonRedundantWriteDAG; 933 1.3 oster else 934 1.3 oster if (asmp->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit) 935 1.3 oster *createFunc = NULL; 936 1.3 oster else 937 1.3 oster *createFunc = (RF_VoidFuncPtr) rf_CreateDegradedWriteDAG; 938 1.3 oster } 939 1.1 oster } 940 1.3 oster #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 941