1 1.31 oster /* $NetBSD: rf_paritylogDiskMgr.c,v 1.31 2021/07/23 00:54:45 oster Exp $ */ 2 1.1 oster /* 3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University. 4 1.1 oster * All rights reserved. 5 1.1 oster * 6 1.1 oster * Author: William V. Courtright II 7 1.1 oster * 8 1.1 oster * Permission to use, copy, modify and distribute this software and 9 1.1 oster * its documentation is hereby granted, provided that both the copyright 10 1.1 oster * notice and this permission notice appear in all copies of the 11 1.1 oster * software, derivative works or modified versions, and any portions 12 1.1 oster * thereof, and that both notices appear in supporting documentation. 13 1.1 oster * 14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 1.1 oster * 18 1.1 oster * Carnegie Mellon requests users of this software to return to 19 1.1 oster * 20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU 21 1.1 oster * School of Computer Science 22 1.1 oster * Carnegie Mellon University 23 1.1 oster * Pittsburgh PA 15213-3890 24 1.1 oster * 25 1.1 oster * any improvements or extensions that they make and grant Carnegie the 26 1.1 oster * rights to redistribute these changes. 27 1.1 oster */ 28 1.1 oster /* Code for flushing and reintegration operations related to parity logging. 29 1.1 oster * 30 1.1 oster */ 31 1.13 lukem 32 1.13 lukem #include <sys/cdefs.h> 33 1.31 oster __KERNEL_RCSID(0, "$NetBSD: rf_paritylogDiskMgr.c,v 1.31 2021/07/23 00:54:45 oster Exp $"); 34 1.1 oster 35 1.1 oster #include "rf_archs.h" 36 1.1 oster 37 1.1 oster #if RF_INCLUDE_PARITYLOGGING > 0 38 1.1 oster 39 1.12 oster #include <dev/raidframe/raidframevar.h> 40 1.12 oster 41 1.1 oster #include "rf_threadstuff.h" 42 1.1 oster #include "rf_mcpair.h" 43 1.1 oster #include "rf_raid.h" 44 1.1 oster #include "rf_dag.h" 45 1.1 oster #include "rf_dagfuncs.h" 46 1.1 oster #include "rf_desc.h" 47 1.1 oster #include "rf_layout.h" 48 1.1 oster #include "rf_diskqueue.h" 49 1.1 oster #include "rf_paritylog.h" 50 1.1 oster #include "rf_general.h" 51 1.1 oster #include "rf_etimer.h" 52 1.1 oster #include "rf_paritylogging.h" 53 1.1 oster #include "rf_engine.h" 54 1.1 oster #include "rf_dagutils.h" 55 1.1 oster #include "rf_map.h" 56 1.1 oster #include "rf_parityscan.h" 57 1.1 oster 58 1.1 oster #include "rf_paritylogDiskMgr.h" 59 1.1 oster 60 1.22 christos static void *AcquireReintBuffer(RF_RegionBufferQueue_t *); 61 1.1 oster 62 1.22 christos static void * 63 1.23 dsl AcquireReintBuffer(RF_RegionBufferQueue_t *pool) 64 1.3 oster { 65 1.22 christos void *bufPtr = NULL; 66 1.3 oster 67 1.3 oster /* Return a region buffer from the free list (pool). If the free list 68 1.3 oster * is empty, WAIT. BLOCKING */ 69 1.3 oster 70 1.28 mrg rf_lock_mutex2(pool->mutex); 71 1.3 oster if (pool->availableBuffers > 0) { 72 1.3 oster bufPtr = pool->buffers[pool->availBuffersIndex]; 73 1.3 oster pool->availableBuffers--; 74 1.3 oster pool->availBuffersIndex++; 75 1.3 oster if (pool->availBuffersIndex == pool->totalBuffers) 76 1.3 oster pool->availBuffersIndex = 0; 77 1.28 mrg rf_unlock_mutex2(pool->mutex); 78 1.3 oster } else { 79 1.10 oster RF_PANIC(); /* should never happen in correct config, 80 1.3 oster * single reint */ 81 1.28 mrg rf_wait_cond2(pool->cond, pool->mutex); 82 1.3 oster } 83 1.3 oster return (bufPtr); 84 1.3 oster } 85 1.3 oster 86 1.17 perry static void 87 1.3 oster ReleaseReintBuffer( 88 1.3 oster RF_RegionBufferQueue_t * pool, 89 1.22 christos void *bufPtr) 90 1.3 oster { 91 1.3 oster /* Insert a region buffer (bufPtr) into the free list (pool). 92 1.3 oster * NON-BLOCKING */ 93 1.3 oster 94 1.28 mrg rf_lock_mutex2(pool->mutex); 95 1.3 oster pool->availableBuffers++; 96 1.3 oster pool->buffers[pool->emptyBuffersIndex] = bufPtr; 97 1.3 oster pool->emptyBuffersIndex++; 98 1.3 oster if (pool->emptyBuffersIndex == pool->totalBuffers) 99 1.3 oster pool->emptyBuffersIndex = 0; 100 1.3 oster RF_ASSERT(pool->availableBuffers <= pool->totalBuffers); 101 1.28 mrg /* 102 1.28 mrg * XXXmrg this signal goes with the above "shouldn't happen" wait? 103 1.28 mrg */ 104 1.28 mrg rf_signal_cond2(pool->cond); 105 1.28 mrg rf_unlock_mutex2(pool->mutex); 106 1.3 oster } 107 1.3 oster 108 1.3 oster 109 1.1 oster 110 1.17 perry static void 111 1.3 oster ReadRegionLog( 112 1.3 oster RF_RegionId_t regionID, 113 1.3 oster RF_MCPair_t * rrd_mcpair, 114 1.22 christos void *regionBuffer, 115 1.3 oster RF_Raid_t * raidPtr, 116 1.3 oster RF_DagHeader_t ** rrd_dag_h, 117 1.3 oster RF_AllocListElem_t ** rrd_alloclist, 118 1.3 oster RF_PhysDiskAddr_t ** rrd_pda) 119 1.3 oster { 120 1.3 oster /* Initiate the read a region log from disk. Once initiated, return 121 1.3 oster * to the calling routine. 122 1.17 perry * 123 1.3 oster * NON-BLOCKING */ 124 1.3 oster 125 1.8 oster RF_AccTraceEntry_t *tracerec; 126 1.3 oster RF_DagNode_t *rrd_rdNode; 127 1.3 oster 128 1.3 oster /* create DAG to read region log from disk */ 129 1.3 oster rf_MakeAllocList(*rrd_alloclist); 130 1.17 perry *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, 131 1.9 oster rf_DiskReadFunc, rf_DiskReadUndoFunc, 132 1.17 perry "Rrl", *rrd_alloclist, 133 1.17 perry RF_DAG_FLAGS_NONE, 134 1.9 oster RF_IO_NORMAL_PRIORITY); 135 1.3 oster 136 1.3 oster /* create and initialize PDA for the core log */ 137 1.31 oster *rrd_pda = rf_AllocPDAList(raidPtr, 1); 138 1.15 oster rf_MapLogParityLogging(raidPtr, regionID, 0, 139 1.9 oster &((*rrd_pda)->col), &((*rrd_pda)->startSector)); 140 1.3 oster (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity; 141 1.3 oster 142 1.3 oster if ((*rrd_pda)->next) { 143 1.3 oster (*rrd_pda)->next = NULL; 144 1.3 oster printf("set rrd_pda->next to NULL\n"); 145 1.3 oster } 146 1.3 oster /* initialize DAG parameters */ 147 1.29 christos tracerec = RF_Malloc(sizeof(*tracerec)); 148 1.8 oster (*rrd_dag_h)->tracerec = tracerec; 149 1.3 oster rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0]; 150 1.3 oster rrd_rdNode->params[0].p = *rrd_pda; 151 1.1 oster /* rrd_rdNode->params[1] = regionBuffer; */ 152 1.3 oster rrd_rdNode->params[2].v = 0; 153 1.16 oster rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); 154 1.1 oster 155 1.3 oster /* launch region log read dag */ 156 1.3 oster rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 157 1.3 oster (void *) rrd_mcpair); 158 1.1 oster } 159 1.1 oster 160 1.1 oster 161 1.1 oster 162 1.17 perry static void 163 1.3 oster WriteCoreLog( 164 1.3 oster RF_ParityLog_t * log, 165 1.3 oster RF_MCPair_t * fwr_mcpair, 166 1.3 oster RF_Raid_t * raidPtr, 167 1.3 oster RF_DagHeader_t ** fwr_dag_h, 168 1.3 oster RF_AllocListElem_t ** fwr_alloclist, 169 1.3 oster RF_PhysDiskAddr_t ** fwr_pda) 170 1.3 oster { 171 1.3 oster RF_RegionId_t regionID = log->regionID; 172 1.8 oster RF_AccTraceEntry_t *tracerec; 173 1.3 oster RF_SectorNum_t regionOffset; 174 1.3 oster RF_DagNode_t *fwr_wrNode; 175 1.3 oster 176 1.3 oster /* Initiate the write of a core log to a region log disk. Once 177 1.3 oster * initiated, return to the calling routine. 178 1.17 perry * 179 1.3 oster * NON-BLOCKING */ 180 1.3 oster 181 1.3 oster /* create DAG to write a core log to a region log disk */ 182 1.3 oster rf_MakeAllocList(*fwr_alloclist); 183 1.17 perry *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, 184 1.9 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 185 1.3 oster "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); 186 1.3 oster 187 1.31 oster *fwr_pda = rf_AllocPDAList(raidPtr, 1); 188 1.3 oster regionOffset = log->diskOffset; 189 1.17 perry rf_MapLogParityLogging(raidPtr, regionID, regionOffset, 190 1.17 perry &((*fwr_pda)->col), 191 1.9 oster &((*fwr_pda)->startSector)); 192 1.3 oster (*fwr_pda)->numSector = raidPtr->numSectorsPerLog; 193 1.3 oster 194 1.3 oster /* initialize DAG parameters */ 195 1.29 christos tracerec = RF_Malloc(sizeof(*tracerec)); 196 1.8 oster (*fwr_dag_h)->tracerec = tracerec; 197 1.3 oster fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0]; 198 1.3 oster fwr_wrNode->params[0].p = *fwr_pda; 199 1.1 oster /* fwr_wrNode->params[1] = log->bufPtr; */ 200 1.3 oster fwr_wrNode->params[2].v = 0; 201 1.16 oster fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); 202 1.3 oster 203 1.3 oster /* launch the dag to write the core log to disk */ 204 1.3 oster rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 205 1.3 oster (void *) fwr_mcpair); 206 1.3 oster } 207 1.3 oster 208 1.3 oster 209 1.17 perry static void 210 1.3 oster ReadRegionParity( 211 1.3 oster RF_RegionId_t regionID, 212 1.3 oster RF_MCPair_t * prd_mcpair, 213 1.22 christos void *parityBuffer, 214 1.3 oster RF_Raid_t * raidPtr, 215 1.3 oster RF_DagHeader_t ** prd_dag_h, 216 1.3 oster RF_AllocListElem_t ** prd_alloclist, 217 1.3 oster RF_PhysDiskAddr_t ** prd_pda) 218 1.3 oster { 219 1.3 oster /* Initiate the read region parity from disk. Once initiated, return 220 1.3 oster * to the calling routine. 221 1.17 perry * 222 1.3 oster * NON-BLOCKING */ 223 1.3 oster 224 1.8 oster RF_AccTraceEntry_t *tracerec; 225 1.3 oster RF_DagNode_t *prd_rdNode; 226 1.3 oster 227 1.3 oster /* create DAG to read region parity from disk */ 228 1.3 oster rf_MakeAllocList(*prd_alloclist); 229 1.17 perry *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, 230 1.17 perry rf_DiskReadUndoFunc, "Rrp", 231 1.17 perry *prd_alloclist, RF_DAG_FLAGS_NONE, 232 1.9 oster RF_IO_NORMAL_PRIORITY); 233 1.3 oster 234 1.3 oster /* create and initialize PDA for region parity */ 235 1.31 oster *prd_pda = rf_AllocPDAList(raidPtr, 1); 236 1.15 oster rf_MapRegionParity(raidPtr, regionID, 237 1.17 perry &((*prd_pda)->col), &((*prd_pda)->startSector), 238 1.9 oster &((*prd_pda)->numSector)); 239 1.3 oster if (rf_parityLogDebug) 240 1.3 oster printf("[reading %d sectors of parity from region %d]\n", 241 1.3 oster (int) (*prd_pda)->numSector, regionID); 242 1.3 oster if ((*prd_pda)->next) { 243 1.3 oster (*prd_pda)->next = NULL; 244 1.3 oster printf("set prd_pda->next to NULL\n"); 245 1.3 oster } 246 1.3 oster /* initialize DAG parameters */ 247 1.29 christos tracerec = RF_Malloc(sizeof(*tracerec)); 248 1.8 oster (*prd_dag_h)->tracerec = tracerec; 249 1.3 oster prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0]; 250 1.3 oster prd_rdNode->params[0].p = *prd_pda; 251 1.3 oster prd_rdNode->params[1].p = parityBuffer; 252 1.3 oster prd_rdNode->params[2].v = 0; 253 1.16 oster prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); 254 1.14 oster #if RF_DEBUG_VALIDATE_DAG 255 1.3 oster if (rf_validateDAGDebug) 256 1.3 oster rf_ValidateDAG(*prd_dag_h); 257 1.14 oster #endif 258 1.3 oster /* launch region parity read dag */ 259 1.3 oster rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 260 1.3 oster (void *) prd_mcpair); 261 1.3 oster } 262 1.3 oster 263 1.17 perry static void 264 1.3 oster WriteRegionParity( 265 1.3 oster RF_RegionId_t regionID, 266 1.3 oster RF_MCPair_t * pwr_mcpair, 267 1.22 christos void *parityBuffer, 268 1.3 oster RF_Raid_t * raidPtr, 269 1.3 oster RF_DagHeader_t ** pwr_dag_h, 270 1.3 oster RF_AllocListElem_t ** pwr_alloclist, 271 1.3 oster RF_PhysDiskAddr_t ** pwr_pda) 272 1.3 oster { 273 1.3 oster /* Initiate the write of region parity to disk. Once initiated, return 274 1.3 oster * to the calling routine. 275 1.17 perry * 276 1.3 oster * NON-BLOCKING */ 277 1.3 oster 278 1.8 oster RF_AccTraceEntry_t *tracerec; 279 1.3 oster RF_DagNode_t *pwr_wrNode; 280 1.3 oster 281 1.3 oster /* create DAG to write region log from disk */ 282 1.3 oster rf_MakeAllocList(*pwr_alloclist); 283 1.17 perry *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, 284 1.9 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 285 1.17 perry "Wrp", *pwr_alloclist, 286 1.17 perry RF_DAG_FLAGS_NONE, 287 1.9 oster RF_IO_NORMAL_PRIORITY); 288 1.3 oster 289 1.3 oster /* create and initialize PDA for region parity */ 290 1.31 oster *pwr_pda = rf_AllocPDAList(raidPtr, 1); 291 1.15 oster rf_MapRegionParity(raidPtr, regionID, 292 1.17 perry &((*pwr_pda)->col), &((*pwr_pda)->startSector), 293 1.9 oster &((*pwr_pda)->numSector)); 294 1.3 oster 295 1.3 oster /* initialize DAG parameters */ 296 1.29 christos tracerec = RF_Malloc(sizeof(*tracerec)); 297 1.8 oster (*pwr_dag_h)->tracerec = tracerec; 298 1.3 oster pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0]; 299 1.3 oster pwr_wrNode->params[0].p = *pwr_pda; 300 1.1 oster /* pwr_wrNode->params[1] = parityBuffer; */ 301 1.3 oster pwr_wrNode->params[2].v = 0; 302 1.16 oster pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); 303 1.1 oster 304 1.3 oster /* launch the dag to write region parity to disk */ 305 1.3 oster rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 306 1.3 oster (void *) pwr_mcpair); 307 1.3 oster } 308 1.3 oster 309 1.17 perry static void 310 1.3 oster FlushLogsToDisk( 311 1.3 oster RF_Raid_t * raidPtr, 312 1.3 oster RF_ParityLog_t * logList) 313 1.3 oster { 314 1.3 oster /* Flush a linked list of core logs to the log disk. Logs contain the 315 1.3 oster * disk location where they should be written. Logs were written in 316 1.3 oster * FIFO order and that order must be preserved. 317 1.17 perry * 318 1.3 oster * Recommended optimizations: 1) allow multiple flushes to occur 319 1.3 oster * simultaneously 2) coalesce contiguous flush operations 320 1.17 perry * 321 1.3 oster * BLOCKING */ 322 1.3 oster 323 1.3 oster RF_ParityLog_t *log; 324 1.3 oster RF_RegionId_t regionID; 325 1.3 oster RF_MCPair_t *fwr_mcpair; 326 1.3 oster RF_DagHeader_t *fwr_dag_h; 327 1.3 oster RF_AllocListElem_t *fwr_alloclist; 328 1.3 oster RF_PhysDiskAddr_t *fwr_pda; 329 1.3 oster 330 1.31 oster fwr_mcpair = rf_AllocMCPair(raidPtr); 331 1.24 mrg RF_LOCK_MCPAIR(fwr_mcpair); 332 1.3 oster 333 1.3 oster RF_ASSERT(logList); 334 1.3 oster log = logList; 335 1.3 oster while (log) { 336 1.3 oster regionID = log->regionID; 337 1.3 oster 338 1.3 oster /* create and launch a DAG to write the core log */ 339 1.3 oster if (rf_parityLogDebug) 340 1.3 oster printf("[initiating write of core log for region %d]\n", regionID); 341 1.3 oster fwr_mcpair->flag = RF_FALSE; 342 1.17 perry WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, 343 1.9 oster &fwr_alloclist, &fwr_pda); 344 1.3 oster 345 1.3 oster /* wait for the DAG to complete */ 346 1.3 oster while (!fwr_mcpair->flag) 347 1.24 mrg RF_WAIT_MCPAIR(fwr_mcpair); 348 1.3 oster if (fwr_dag_h->status != rf_enable) { 349 1.3 oster RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID); 350 1.3 oster RF_ASSERT(0); 351 1.3 oster } 352 1.3 oster /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */ 353 1.31 oster rf_FreePhysDiskAddr(raidPtr, fwr_pda); 354 1.3 oster rf_FreeDAG(fwr_dag_h); 355 1.3 oster rf_FreeAllocList(fwr_alloclist); 356 1.3 oster 357 1.3 oster log = log->next; 358 1.3 oster } 359 1.24 mrg RF_UNLOCK_MCPAIR(fwr_mcpair); 360 1.31 oster rf_FreeMCPair(raidPtr, fwr_mcpair); 361 1.3 oster rf_ReleaseParityLogs(raidPtr, logList); 362 1.3 oster } 363 1.3 oster 364 1.17 perry static void 365 1.3 oster ReintegrateRegion( 366 1.3 oster RF_Raid_t * raidPtr, 367 1.3 oster RF_RegionId_t regionID, 368 1.21 christos RF_ParityLog_t * coreLog) 369 1.3 oster { 370 1.3 oster RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair; 371 1.19 christos RF_DagHeader_t *rrd_dag_h = NULL, *prd_dag_h, *pwr_dag_h; 372 1.19 christos RF_AllocListElem_t *rrd_alloclist = NULL, *prd_alloclist, *pwr_alloclist; 373 1.19 christos RF_PhysDiskAddr_t *rrd_pda = NULL, *prd_pda, *pwr_pda; 374 1.22 christos void *parityBuffer, *regionBuffer = NULL; 375 1.3 oster 376 1.17 perry /* Reintegrate a region (regionID). 377 1.10 oster * 378 1.17 perry * 1. acquire region and parity buffers 379 1.17 perry * 2. read log from disk 380 1.17 perry * 3. read parity from disk 381 1.17 perry * 4. apply log to parity 382 1.17 perry * 5. apply core log to parity 383 1.10 oster * 6. write new parity to disk 384 1.17 perry * 385 1.3 oster * BLOCKING */ 386 1.3 oster 387 1.3 oster if (rf_parityLogDebug) 388 1.3 oster printf("[reintegrating region %d]\n", regionID); 389 1.3 oster 390 1.3 oster /* initiate read of region parity */ 391 1.3 oster if (rf_parityLogDebug) 392 1.9 oster printf("[initiating read of parity for region %d]\n",regionID); 393 1.3 oster parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool); 394 1.31 oster prd_mcpair = rf_AllocMCPair(raidPtr); 395 1.24 mrg RF_LOCK_MCPAIR(prd_mcpair); 396 1.3 oster prd_mcpair->flag = RF_FALSE; 397 1.17 perry ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, 398 1.9 oster &prd_dag_h, &prd_alloclist, &prd_pda); 399 1.3 oster 400 1.3 oster /* if region log nonempty, initiate read */ 401 1.3 oster if (raidPtr->regionInfo[regionID].diskCount > 0) { 402 1.3 oster if (rf_parityLogDebug) 403 1.9 oster printf("[initiating read of disk log for region %d]\n", 404 1.9 oster regionID); 405 1.3 oster regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool); 406 1.31 oster rrd_mcpair = rf_AllocMCPair(raidPtr); 407 1.24 mrg RF_LOCK_MCPAIR(rrd_mcpair); 408 1.3 oster rrd_mcpair->flag = RF_FALSE; 409 1.17 perry ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, 410 1.9 oster &rrd_dag_h, &rrd_alloclist, &rrd_pda); 411 1.3 oster } 412 1.3 oster /* wait on read of region parity to complete */ 413 1.3 oster while (!prd_mcpair->flag) { 414 1.24 mrg RF_WAIT_MCPAIR(prd_mcpair); 415 1.3 oster } 416 1.24 mrg RF_UNLOCK_MCPAIR(prd_mcpair); 417 1.3 oster if (prd_dag_h->status != rf_enable) { 418 1.3 oster RF_ERRORMSG("Unable to read parity from disk\n"); 419 1.3 oster /* add code to fail the parity disk */ 420 1.3 oster RF_ASSERT(0); 421 1.3 oster } 422 1.3 oster /* apply core log to parity */ 423 1.3 oster /* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */ 424 1.3 oster 425 1.3 oster if (raidPtr->regionInfo[regionID].diskCount > 0) { 426 1.3 oster /* wait on read of region log to complete */ 427 1.3 oster while (!rrd_mcpair->flag) 428 1.24 mrg RF_WAIT_MCPAIR(rrd_mcpair); 429 1.24 mrg RF_UNLOCK_MCPAIR(rrd_mcpair); 430 1.3 oster if (rrd_dag_h->status != rf_enable) { 431 1.3 oster RF_ERRORMSG("Unable to read region log from disk\n"); 432 1.3 oster /* add code to fail the log disk */ 433 1.3 oster RF_ASSERT(0); 434 1.3 oster } 435 1.3 oster /* apply region log to parity */ 436 1.3 oster /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */ 437 1.3 oster /* release resources associated with region log */ 438 1.3 oster /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */ 439 1.31 oster rf_FreePhysDiskAddr(raidPtr, rrd_pda); 440 1.3 oster rf_FreeDAG(rrd_dag_h); 441 1.3 oster rf_FreeAllocList(rrd_alloclist); 442 1.31 oster rf_FreeMCPair(raidPtr, rrd_mcpair); 443 1.3 oster ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer); 444 1.3 oster } 445 1.3 oster /* write reintegrated parity to disk */ 446 1.3 oster if (rf_parityLogDebug) 447 1.9 oster printf("[initiating write of parity for region %d]\n", 448 1.9 oster regionID); 449 1.31 oster pwr_mcpair = rf_AllocMCPair(raidPtr); 450 1.24 mrg RF_LOCK_MCPAIR(pwr_mcpair); 451 1.3 oster pwr_mcpair->flag = RF_FALSE; 452 1.17 perry WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, 453 1.9 oster &pwr_dag_h, &pwr_alloclist, &pwr_pda); 454 1.3 oster while (!pwr_mcpair->flag) 455 1.24 mrg RF_WAIT_MCPAIR(pwr_mcpair); 456 1.24 mrg RF_UNLOCK_MCPAIR(pwr_mcpair); 457 1.3 oster if (pwr_dag_h->status != rf_enable) { 458 1.3 oster RF_ERRORMSG("Unable to write parity to disk\n"); 459 1.3 oster /* add code to fail the parity disk */ 460 1.3 oster RF_ASSERT(0); 461 1.3 oster } 462 1.3 oster /* release resources associated with read of old parity */ 463 1.3 oster /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */ 464 1.31 oster rf_FreePhysDiskAddr(raidPtr, prd_pda); 465 1.3 oster rf_FreeDAG(prd_dag_h); 466 1.3 oster rf_FreeAllocList(prd_alloclist); 467 1.31 oster rf_FreeMCPair(raidPtr, prd_mcpair); 468 1.3 oster 469 1.3 oster /* release resources associated with write of new parity */ 470 1.3 oster ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer); 471 1.3 oster /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */ 472 1.31 oster rf_FreePhysDiskAddr(raidPtr, pwr_pda); 473 1.3 oster rf_FreeDAG(pwr_dag_h); 474 1.3 oster rf_FreeAllocList(pwr_alloclist); 475 1.31 oster rf_FreeMCPair(raidPtr, pwr_mcpair); 476 1.3 oster 477 1.3 oster if (rf_parityLogDebug) 478 1.3 oster printf("[finished reintegrating region %d]\n", regionID); 479 1.3 oster } 480 1.3 oster 481 1.3 oster 482 1.3 oster 483 1.17 perry static void 484 1.3 oster ReintegrateLogs( 485 1.3 oster RF_Raid_t * raidPtr, 486 1.3 oster RF_ParityLog_t * logList) 487 1.3 oster { 488 1.3 oster RF_ParityLog_t *log, *freeLogList = NULL; 489 1.3 oster RF_ParityLogData_t *logData, *logDataList; 490 1.3 oster RF_RegionId_t regionID; 491 1.3 oster 492 1.3 oster RF_ASSERT(logList); 493 1.3 oster while (logList) { 494 1.3 oster log = logList; 495 1.3 oster logList = logList->next; 496 1.3 oster log->next = NULL; 497 1.3 oster regionID = log->regionID; 498 1.3 oster ReintegrateRegion(raidPtr, regionID, log); 499 1.3 oster log->numRecords = 0; 500 1.3 oster 501 1.3 oster /* remove all items which are blocked on reintegration of this 502 1.3 oster * region */ 503 1.25 mrg rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 504 1.17 perry logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, 505 1.17 perry &raidPtr->parityLogDiskQueue.reintBlockHead, 506 1.17 perry &raidPtr->parityLogDiskQueue.reintBlockTail, 507 1.9 oster RF_TRUE); 508 1.3 oster logDataList = logData; 509 1.3 oster while (logData) { 510 1.9 oster logData->next = rf_SearchAndDequeueParityLogData( 511 1.17 perry raidPtr, regionID, 512 1.17 perry &raidPtr->parityLogDiskQueue.reintBlockHead, 513 1.17 perry &raidPtr->parityLogDiskQueue.reintBlockTail, 514 1.9 oster RF_TRUE); 515 1.3 oster logData = logData->next; 516 1.3 oster } 517 1.25 mrg rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 518 1.3 oster 519 1.3 oster /* process blocked log data and clear reintInProgress flag for 520 1.3 oster * this region */ 521 1.3 oster if (logDataList) 522 1.3 oster rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE); 523 1.3 oster else { 524 1.3 oster /* Enable flushing for this region. Holding both 525 1.3 oster * locks provides a synchronization barrier with 526 1.3 oster * DumpParityLogToDisk */ 527 1.27 mrg rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex); 528 1.26 mrg rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex); 529 1.25 mrg /* XXXmrg: don't need this? */ 530 1.25 mrg rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 531 1.3 oster raidPtr->regionInfo[regionID].diskCount = 0; 532 1.3 oster raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; 533 1.27 mrg rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex); 534 1.26 mrg rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now 535 1.3 oster * enabled */ 536 1.25 mrg /* XXXmrg: don't need this? */ 537 1.25 mrg rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 538 1.3 oster } 539 1.3 oster /* if log wasn't used, attach it to the list of logs to be 540 1.3 oster * returned */ 541 1.3 oster if (log) { 542 1.3 oster log->next = freeLogList; 543 1.3 oster freeLogList = log; 544 1.3 oster } 545 1.3 oster } 546 1.3 oster if (freeLogList) 547 1.3 oster rf_ReleaseParityLogs(raidPtr, freeLogList); 548 1.3 oster } 549 1.3 oster 550 1.17 perry int 551 1.3 oster rf_ShutdownLogging(RF_Raid_t * raidPtr) 552 1.3 oster { 553 1.3 oster /* shutdown parity logging 1) disable parity logging in all regions 2) 554 1.3 oster * reintegrate all regions */ 555 1.3 oster 556 1.3 oster RF_SectorCount_t diskCount; 557 1.3 oster RF_RegionId_t regionID; 558 1.3 oster RF_ParityLog_t *log; 559 1.3 oster 560 1.3 oster if (rf_parityLogDebug) 561 1.3 oster printf("[shutting down parity logging]\n"); 562 1.3 oster /* Since parity log maps are volatile, we must reintegrate all 563 1.3 oster * regions. */ 564 1.3 oster if (rf_forceParityLogReint) { 565 1.3 oster for (regionID = 0; regionID < rf_numParityRegions; regionID++) { 566 1.27 mrg rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex); 567 1.17 perry raidPtr->regionInfo[regionID].loggingEnabled = 568 1.9 oster RF_FALSE; 569 1.3 oster log = raidPtr->regionInfo[regionID].coreLog; 570 1.3 oster raidPtr->regionInfo[regionID].coreLog = NULL; 571 1.3 oster diskCount = raidPtr->regionInfo[regionID].diskCount; 572 1.27 mrg rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex); 573 1.3 oster if (diskCount > 0 || log != NULL) 574 1.3 oster ReintegrateRegion(raidPtr, regionID, log); 575 1.3 oster if (log != NULL) 576 1.3 oster rf_ReleaseParityLogs(raidPtr, log); 577 1.3 oster } 578 1.3 oster } 579 1.3 oster if (rf_parityLogDebug) { 580 1.3 oster printf("[parity logging disabled]\n"); 581 1.3 oster printf("[should be done!]\n"); 582 1.3 oster } 583 1.3 oster return (0); 584 1.3 oster } 585 1.3 oster 586 1.30 christos void 587 1.30 christos rf_ParityLoggingDiskManager(void *v) 588 1.3 oster { 589 1.30 christos RF_Raid_t *raidPtr = v; 590 1.3 oster RF_ParityLog_t *reintQueue, *flushQueue; 591 1.3 oster int workNeeded, done = RF_FALSE; 592 1.8 oster int s; 593 1.3 oster 594 1.3 oster /* Main program for parity logging disk thread. This routine waits 595 1.3 oster * for work to appear in either the flush or reintegration queues and 596 1.3 oster * is responsible for flushing core logs to the log disk as well as 597 1.3 oster * reintegrating parity regions. 598 1.17 perry * 599 1.3 oster * BLOCKING */ 600 1.3 oster 601 1.8 oster s = splbio(); 602 1.8 oster 603 1.25 mrg rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 604 1.3 oster 605 1.3 oster /* 606 1.3 oster * Inform our creator that we're running. Don't bother doing the 607 1.3 oster * mutex lock/unlock dance- we locked above, and we'll unlock 608 1.3 oster * below with nothing to do, yet. 609 1.3 oster */ 610 1.3 oster raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING; 611 1.25 mrg rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); 612 1.3 oster 613 1.3 oster /* empty the work queues */ 614 1.3 oster flushQueue = raidPtr->parityLogDiskQueue.flushQueue; 615 1.3 oster raidPtr->parityLogDiskQueue.flushQueue = NULL; 616 1.3 oster reintQueue = raidPtr->parityLogDiskQueue.reintQueue; 617 1.3 oster raidPtr->parityLogDiskQueue.reintQueue = NULL; 618 1.3 oster workNeeded = (flushQueue || reintQueue); 619 1.3 oster 620 1.3 oster while (!done) { 621 1.3 oster while (workNeeded) { 622 1.3 oster /* First, flush all logs in the flush queue, freeing 623 1.3 oster * buffers Second, reintegrate all regions which are 624 1.3 oster * reported as full. Third, append queued log data 625 1.3 oster * until blocked. 626 1.17 perry * 627 1.3 oster * Note: Incoming appends (ParityLogAppend) can block on 628 1.3 oster * either 1. empty buffer pool 2. region under 629 1.3 oster * reintegration To preserve a global FIFO ordering of 630 1.3 oster * appends, buffers are not released to the world 631 1.3 oster * until those appends blocked on buffers are removed 632 1.3 oster * from the append queue. Similarly, regions which 633 1.3 oster * are reintegrated are not opened for general use 634 1.3 oster * until the append queue has been emptied. */ 635 1.3 oster 636 1.25 mrg rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 637 1.3 oster 638 1.3 oster /* empty flushQueue, using free'd log buffers to 639 1.3 oster * process bufTail */ 640 1.3 oster if (flushQueue) 641 1.8 oster FlushLogsToDisk(raidPtr, flushQueue); 642 1.3 oster 643 1.3 oster /* empty reintQueue, flushing from reintTail as we go */ 644 1.3 oster if (reintQueue) 645 1.3 oster ReintegrateLogs(raidPtr, reintQueue); 646 1.3 oster 647 1.25 mrg rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 648 1.3 oster flushQueue = raidPtr->parityLogDiskQueue.flushQueue; 649 1.3 oster raidPtr->parityLogDiskQueue.flushQueue = NULL; 650 1.3 oster reintQueue = raidPtr->parityLogDiskQueue.reintQueue; 651 1.3 oster raidPtr->parityLogDiskQueue.reintQueue = NULL; 652 1.3 oster workNeeded = (flushQueue || reintQueue); 653 1.3 oster } 654 1.3 oster /* no work is needed at this point */ 655 1.3 oster if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) { 656 1.3 oster /* shutdown parity logging 1. disable parity logging 657 1.3 oster * in all regions 2. reintegrate all regions */ 658 1.3 oster done = RF_TRUE; /* thread disabled, no work needed */ 659 1.25 mrg rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 660 1.3 oster rf_ShutdownLogging(raidPtr); 661 1.3 oster } 662 1.3 oster if (!done) { 663 1.3 oster /* thread enabled, no work needed, so sleep */ 664 1.3 oster if (rf_parityLogDebug) 665 1.3 oster printf("[parity logging disk manager sleeping]\n"); 666 1.25 mrg rf_wait_cond2(raidPtr->parityLogDiskQueue.cond, 667 1.25 mrg raidPtr->parityLogDiskQueue.mutex); 668 1.3 oster if (rf_parityLogDebug) 669 1.3 oster printf("[parity logging disk manager just woke up]\n"); 670 1.3 oster flushQueue = raidPtr->parityLogDiskQueue.flushQueue; 671 1.3 oster raidPtr->parityLogDiskQueue.flushQueue = NULL; 672 1.3 oster reintQueue = raidPtr->parityLogDiskQueue.reintQueue; 673 1.3 oster raidPtr->parityLogDiskQueue.reintQueue = NULL; 674 1.3 oster workNeeded = (flushQueue || reintQueue); 675 1.3 oster } 676 1.3 oster } 677 1.3 oster /* 678 1.3 oster * Announce that we're done. 679 1.3 oster */ 680 1.25 mrg rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 681 1.3 oster raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN; 682 1.25 mrg rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); 683 1.25 mrg rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 684 1.8 oster 685 1.8 oster splx(s); 686 1.7 oster 687 1.3 oster /* 688 1.3 oster * In the NetBSD kernel, the thread must exit; returning would 689 1.3 oster * cause the proc trampoline to attempt to return to userspace. 690 1.3 oster */ 691 1.3 oster kthread_exit(0); /* does not return */ 692 1.1 oster } 693 1.3 oster #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 694