1 1.39 oster /* $NetBSD: rf_raid1.c,v 1.39 2021/07/23 22:34:12 oster Exp $ */ 2 1.1 oster /* 3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University. 4 1.1 oster * All rights reserved. 5 1.1 oster * 6 1.1 oster * Author: William V. Courtright II 7 1.1 oster * 8 1.1 oster * Permission to use, copy, modify and distribute this software and 9 1.1 oster * its documentation is hereby granted, provided that both the copyright 10 1.1 oster * notice and this permission notice appear in all copies of the 11 1.1 oster * software, derivative works or modified versions, and any portions 12 1.1 oster * thereof, and that both notices appear in supporting documentation. 13 1.1 oster * 14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 1.1 oster * 18 1.1 oster * Carnegie Mellon requests users of this software to return to 19 1.1 oster * 20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU 21 1.1 oster * School of Computer Science 22 1.1 oster * Carnegie Mellon University 23 1.1 oster * Pittsburgh PA 15213-3890 24 1.1 oster * 25 1.1 oster * any improvements or extensions that they make and grant Carnegie the 26 1.1 oster * rights to redistribute these changes. 27 1.1 oster */ 28 1.1 oster 29 1.1 oster /***************************************************************************** 30 1.1 oster * 31 1.1 oster * rf_raid1.c -- implements RAID Level 1 32 1.1 oster * 33 1.1 oster *****************************************************************************/ 34 1.8 lukem 35 1.8 lukem #include <sys/cdefs.h> 36 1.39 oster __KERNEL_RCSID(0, "$NetBSD: rf_raid1.c,v 1.39 2021/07/23 22:34:12 oster Exp $"); 37 1.1 oster 38 1.1 oster #include "rf_raid.h" 39 1.1 oster #include "rf_raid1.h" 40 1.1 oster #include "rf_dag.h" 41 1.1 oster #include "rf_dagffrd.h" 42 1.1 oster #include "rf_dagffwr.h" 43 1.1 oster #include "rf_dagdegrd.h" 44 1.1 oster #include "rf_dagutils.h" 45 1.1 oster #include "rf_dagfuncs.h" 46 1.1 oster #include "rf_diskqueue.h" 47 1.1 oster #include "rf_general.h" 48 1.1 oster #include "rf_utils.h" 49 1.1 oster #include "rf_parityscan.h" 50 1.1 oster #include "rf_mcpair.h" 51 1.1 oster #include "rf_layout.h" 52 1.1 oster #include "rf_map.h" 53 1.1 oster #include "rf_engine.h" 54 1.1 oster #include "rf_reconbuffer.h" 55 1.1 oster 56 1.1 oster typedef struct RF_Raid1ConfigInfo_s { 57 1.3 oster RF_RowCol_t **stripeIdentifier; 58 1.3 oster } RF_Raid1ConfigInfo_t; 59 1.1 oster /* start of day code specific to RAID level 1 */ 60 1.25 perry int 61 1.29 christos rf_ConfigureRAID1(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, 62 1.29 christos RF_Config_t *cfgPtr) 63 1.1 oster { 64 1.3 oster RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 65 1.3 oster RF_Raid1ConfigInfo_t *info; 66 1.3 oster RF_RowCol_t i; 67 1.3 oster 68 1.39 oster /* Sanity check the number of columns... */ 69 1.39 oster if (raidPtr->numCol < 2 || raidPtr->numCol % 2 != 0) { 70 1.39 oster return (EINVAL); 71 1.39 oster } 72 1.39 oster 73 1.3 oster /* create a RAID level 1 configuration structure */ 74 1.36 christos info = RF_MallocAndAdd(sizeof(*info), raidPtr->cleanupList); 75 1.3 oster if (info == NULL) 76 1.3 oster return (ENOMEM); 77 1.3 oster layoutPtr->layoutSpecificInfo = (void *) info; 78 1.3 oster 79 1.3 oster /* ... and fill it in. */ 80 1.3 oster info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol / 2, 2, raidPtr->cleanupList); 81 1.3 oster if (info->stripeIdentifier == NULL) 82 1.3 oster return (ENOMEM); 83 1.3 oster for (i = 0; i < (raidPtr->numCol / 2); i++) { 84 1.3 oster info->stripeIdentifier[i][0] = (2 * i); 85 1.3 oster info->stripeIdentifier[i][1] = (2 * i) + 1; 86 1.3 oster } 87 1.3 oster 88 1.3 oster /* this implementation of RAID level 1 uses one row of numCol disks 89 1.3 oster * and allows multiple (numCol / 2) stripes per row. A stripe 90 1.3 oster * consists of a single data unit and a single parity (mirror) unit. 91 1.3 oster * stripe id = raidAddr / stripeUnitSize */ 92 1.3 oster raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2) * layoutPtr->sectorsPerStripeUnit; 93 1.3 oster layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2); 94 1.3 oster layoutPtr->dataSectorsPerStripe = layoutPtr->sectorsPerStripeUnit; 95 1.3 oster layoutPtr->numDataCol = 1; 96 1.3 oster layoutPtr->numParityCol = 1; 97 1.3 oster return (0); 98 1.1 oster } 99 1.1 oster 100 1.1 oster 101 1.1 oster /* returns the physical disk location of the primary copy in the mirror pair */ 102 1.25 perry void 103 1.15 oster rf_MapSectorRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, 104 1.28 christos RF_RowCol_t *col, RF_SectorNum_t *diskSector, 105 1.29 christos int remap) 106 1.1 oster { 107 1.3 oster RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; 108 1.3 oster RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2); 109 1.1 oster 110 1.3 oster *col = 2 * mirrorPair; 111 1.3 oster *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 112 1.1 oster } 113 1.1 oster 114 1.1 oster 115 1.1 oster /* Map Parity 116 1.1 oster * 117 1.1 oster * returns the physical disk location of the secondary copy in the mirror 118 1.1 oster * pair 119 1.1 oster */ 120 1.25 perry void 121 1.15 oster rf_MapParityRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, 122 1.28 christos RF_RowCol_t *col, RF_SectorNum_t *diskSector, 123 1.29 christos int remap) 124 1.1 oster { 125 1.3 oster RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; 126 1.3 oster RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2); 127 1.1 oster 128 1.3 oster *col = (2 * mirrorPair) + 1; 129 1.1 oster 130 1.3 oster *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 131 1.1 oster } 132 1.1 oster 133 1.1 oster 134 1.1 oster /* IdentifyStripeRAID1 135 1.1 oster * 136 1.1 oster * returns a list of disks for a given redundancy group 137 1.1 oster */ 138 1.25 perry void 139 1.15 oster rf_IdentifyStripeRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, 140 1.15 oster RF_RowCol_t **diskids) 141 1.1 oster { 142 1.3 oster RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); 143 1.3 oster RF_Raid1ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo; 144 1.3 oster RF_ASSERT(stripeID >= 0); 145 1.3 oster RF_ASSERT(addr >= 0); 146 1.3 oster *diskids = info->stripeIdentifier[stripeID % (raidPtr->numCol / 2)]; 147 1.3 oster RF_ASSERT(*diskids); 148 1.1 oster } 149 1.1 oster 150 1.1 oster 151 1.1 oster /* MapSIDToPSIDRAID1 152 1.1 oster * 153 1.1 oster * maps a logical stripe to a stripe in the redundant array 154 1.1 oster */ 155 1.25 perry void 156 1.29 christos rf_MapSIDToPSIDRAID1(RF_RaidLayout_t *layoutPtr, 157 1.28 christos RF_StripeNum_t stripeID, 158 1.15 oster RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru) 159 1.1 oster { 160 1.3 oster *which_ru = 0; 161 1.3 oster *psID = stripeID; 162 1.1 oster } 163 1.1 oster 164 1.1 oster 165 1.1 oster 166 1.1 oster /****************************************************************************** 167 1.1 oster * select a graph to perform a single-stripe access 168 1.1 oster * 169 1.1 oster * Parameters: raidPtr - description of the physical array 170 1.1 oster * type - type of operation (read or write) requested 171 1.1 oster * asmap - logical & physical addresses for this access 172 1.1 oster * createFunc - name of function to use to create the graph 173 1.1 oster *****************************************************************************/ 174 1.1 oster 175 1.25 perry void 176 1.15 oster rf_RAID1DagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, 177 1.15 oster RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc) 178 1.1 oster { 179 1.35 martin RF_RowCol_t fcol, oc __unused; 180 1.3 oster RF_PhysDiskAddr_t *failedPDA; 181 1.5 oster int prior_recon; 182 1.3 oster RF_RowStatus_t rstat; 183 1.35 martin RF_SectorNum_t oo __unused; 184 1.3 oster 185 1.3 oster 186 1.3 oster RF_ASSERT(RF_IO_IS_R_OR_W(type)); 187 1.3 oster 188 1.3 oster if (asmap->numDataFailed + asmap->numParityFailed > 1) { 189 1.23 oster #if RF_DEBUG_DAG 190 1.25 perry if (rf_dagDebug) 191 1.16 oster RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); 192 1.23 oster #endif 193 1.3 oster *createFunc = NULL; 194 1.3 oster return; 195 1.3 oster } 196 1.3 oster if (asmap->numDataFailed + asmap->numParityFailed) { 197 1.3 oster /* 198 1.3 oster * We've got a fault. Re-map to spare space, iff applicable. 199 1.3 oster * Shouldn't the arch-independent code do this for us? 200 1.3 oster * Anyway, it turns out if we don't do this here, then when 201 1.3 oster * we're reconstructing, writes go only to the surviving 202 1.3 oster * original disk, and aren't reflected on the reconstructed 203 1.3 oster * spare. Oops. --jimz 204 1.3 oster */ 205 1.3 oster failedPDA = asmap->failedPDAs[0]; 206 1.3 oster fcol = failedPDA->col; 207 1.14 oster rstat = raidPtr->status; 208 1.3 oster prior_recon = (rstat == rf_rs_reconfigured) || ( 209 1.3 oster (rstat == rf_rs_reconstructing) ? 210 1.14 oster rf_CheckRUReconstructed(raidPtr->reconControl->reconMap, failedPDA->startSector) : 0 211 1.3 oster ); 212 1.3 oster if (prior_recon) { 213 1.3 oster oc = fcol; 214 1.3 oster oo = failedPDA->startSector; 215 1.3 oster /* 216 1.3 oster * If we did distributed sparing, we'd monkey with that here. 217 1.3 oster * But we don't, so we'll 218 1.3 oster */ 219 1.14 oster failedPDA->col = raidPtr->Disks[fcol].spareCol; 220 1.3 oster /* 221 1.3 oster * Redirect other components, iff necessary. This looks 222 1.3 oster * pretty suspicious to me, but it's what the raid5 223 1.3 oster * DAG select does. 224 1.3 oster */ 225 1.3 oster if (asmap->parityInfo->next) { 226 1.3 oster if (failedPDA == asmap->parityInfo) { 227 1.3 oster failedPDA->next->col = failedPDA->col; 228 1.3 oster } else { 229 1.3 oster if (failedPDA == asmap->parityInfo->next) { 230 1.3 oster asmap->parityInfo->col = failedPDA->col; 231 1.3 oster } 232 1.3 oster } 233 1.3 oster } 234 1.23 oster #if RF_DEBUG_DAG > 0 || RF_DEBUG_MAP > 0 235 1.3 oster if (rf_dagDebug || rf_mapDebug) { 236 1.14 oster printf("raid%d: Redirected type '%c' c %d o %ld -> c %d o %ld\n", 237 1.25 perry raidPtr->raidid, type, oc, 238 1.25 perry (long) oo, 239 1.5 oster failedPDA->col, 240 1.5 oster (long) failedPDA->startSector); 241 1.3 oster } 242 1.23 oster #endif 243 1.3 oster asmap->numDataFailed = asmap->numParityFailed = 0; 244 1.3 oster } 245 1.3 oster } 246 1.3 oster if (type == RF_IO_TYPE_READ) { 247 1.3 oster if (asmap->numDataFailed == 0) 248 1.3 oster *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorIdleReadDAG; 249 1.3 oster else 250 1.3 oster *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneDegradedReadDAG; 251 1.3 oster } else { 252 1.3 oster *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; 253 1.3 oster } 254 1.1 oster } 255 1.1 oster 256 1.25 perry int 257 1.15 oster rf_VerifyParityRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr, 258 1.15 oster RF_PhysDiskAddr_t *parityPDA, int correct_it, 259 1.15 oster RF_RaidAccessFlags_t flags) 260 1.1 oster { 261 1.5 oster int nbytes, bcount, stripeWidth, ret, i, j, nbad, *bbufs; 262 1.13 oster RF_DagNode_t *blockNode, *wrBlock; 263 1.3 oster RF_DagHeader_t *rd_dag_h, *wr_dag_h; 264 1.3 oster RF_AccessStripeMapHeader_t *asm_h; 265 1.3 oster RF_AllocListElem_t *allocList; 266 1.19 oster #if RF_ACC_TRACE > 0 267 1.3 oster RF_AccTraceEntry_t tracerec; 268 1.19 oster #endif 269 1.3 oster RF_ReconUnitNum_t which_ru; 270 1.3 oster RF_RaidLayout_t *layoutPtr; 271 1.3 oster RF_AccessStripeMap_t *aasm; 272 1.3 oster RF_SectorCount_t nsector; 273 1.3 oster RF_RaidAddr_t startAddr; 274 1.26 christos char *bf, *buf1, *buf2; 275 1.3 oster RF_PhysDiskAddr_t *pda; 276 1.3 oster RF_StripeNum_t psID; 277 1.3 oster RF_MCPair_t *mcpair; 278 1.3 oster 279 1.3 oster layoutPtr = &raidPtr->Layout; 280 1.3 oster startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); 281 1.3 oster nsector = parityPDA->numSector; 282 1.3 oster nbytes = rf_RaidAddressToByte(raidPtr, nsector); 283 1.3 oster psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru); 284 1.3 oster 285 1.3 oster asm_h = NULL; 286 1.3 oster rd_dag_h = wr_dag_h = NULL; 287 1.3 oster mcpair = NULL; 288 1.3 oster 289 1.3 oster ret = RF_PARITY_COULD_NOT_VERIFY; 290 1.3 oster 291 1.3 oster rf_MakeAllocList(allocList); 292 1.3 oster if (allocList == NULL) 293 1.3 oster return (RF_PARITY_COULD_NOT_VERIFY); 294 1.38 oster mcpair = rf_AllocMCPair(raidPtr); 295 1.3 oster if (mcpair == NULL) 296 1.3 oster goto done; 297 1.3 oster RF_ASSERT(layoutPtr->numDataCol == layoutPtr->numParityCol); 298 1.3 oster stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; 299 1.3 oster bcount = nbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol); 300 1.36 christos bf = RF_MallocAndAdd(bcount, allocList); 301 1.26 christos if (bf == NULL) 302 1.3 oster goto done; 303 1.10 oster #if RF_DEBUG_VERIFYPARITY 304 1.3 oster if (rf_verifyParityDebug) { 305 1.5 oster printf("raid%d: RAID1 parity verify: buf=%lx bcount=%d (%lx - %lx)\n", 306 1.26 christos raidPtr->raidid, (long) bf, bcount, (long) bf, 307 1.26 christos (long) bf + bcount); 308 1.3 oster } 309 1.10 oster #endif 310 1.3 oster /* 311 1.3 oster * Generate a DAG which will read the entire stripe- then we can 312 1.3 oster * just compare data chunks versus "parity" chunks. 313 1.3 oster */ 314 1.3 oster 315 1.26 christos rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, nbytes, bf, 316 1.3 oster rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rod", allocList, flags, 317 1.3 oster RF_IO_NORMAL_PRIORITY); 318 1.3 oster if (rd_dag_h == NULL) 319 1.3 oster goto done; 320 1.3 oster blockNode = rd_dag_h->succedents[0]; 321 1.3 oster 322 1.3 oster /* 323 1.3 oster * Map the access to physical disk addresses (PDAs)- this will 324 1.3 oster * get us both a list of data addresses, and "parity" addresses 325 1.3 oster * (which are really mirror copies). 326 1.3 oster */ 327 1.3 oster asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, 328 1.26 christos bf, RF_DONT_REMAP); 329 1.3 oster aasm = asm_h->stripeMap; 330 1.3 oster 331 1.26 christos buf1 = bf; 332 1.3 oster /* 333 1.3 oster * Loop through the data blocks, setting up read nodes for each. 334 1.3 oster */ 335 1.3 oster for (pda = aasm->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { 336 1.3 oster RF_ASSERT(pda); 337 1.3 oster 338 1.3 oster rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); 339 1.3 oster 340 1.3 oster RF_ASSERT(pda->numSector != 0); 341 1.3 oster if (rf_TryToRedirectPDA(raidPtr, pda, 0)) { 342 1.3 oster /* cannot verify parity with dead disk */ 343 1.3 oster goto done; 344 1.3 oster } 345 1.3 oster pda->bufPtr = buf1; 346 1.3 oster blockNode->succedents[i]->params[0].p = pda; 347 1.3 oster blockNode->succedents[i]->params[1].p = buf1; 348 1.3 oster blockNode->succedents[i]->params[2].v = psID; 349 1.17 oster blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 350 1.3 oster buf1 += nbytes; 351 1.3 oster } 352 1.3 oster RF_ASSERT(pda == NULL); 353 1.3 oster /* 354 1.3 oster * keep i, buf1 running 355 1.3 oster * 356 1.3 oster * Loop through parity blocks, setting up read nodes for each. 357 1.3 oster */ 358 1.3 oster for (pda = aasm->parityInfo; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++, pda = pda->next) { 359 1.3 oster RF_ASSERT(pda); 360 1.3 oster rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); 361 1.3 oster RF_ASSERT(pda->numSector != 0); 362 1.3 oster if (rf_TryToRedirectPDA(raidPtr, pda, 0)) { 363 1.3 oster /* cannot verify parity with dead disk */ 364 1.3 oster goto done; 365 1.3 oster } 366 1.3 oster pda->bufPtr = buf1; 367 1.3 oster blockNode->succedents[i]->params[0].p = pda; 368 1.3 oster blockNode->succedents[i]->params[1].p = buf1; 369 1.3 oster blockNode->succedents[i]->params[2].v = psID; 370 1.17 oster blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 371 1.3 oster buf1 += nbytes; 372 1.3 oster } 373 1.3 oster RF_ASSERT(pda == NULL); 374 1.3 oster 375 1.19 oster #if RF_ACC_TRACE > 0 376 1.36 christos memset(&tracerec, 0, sizeof(tracerec)); 377 1.3 oster rd_dag_h->tracerec = &tracerec; 378 1.19 oster #endif 379 1.9 oster #if 0 380 1.3 oster if (rf_verifyParityDebug > 1) { 381 1.25 perry printf("raid%d: RAID1 parity verify read dag:\n", 382 1.5 oster raidPtr->raidid); 383 1.3 oster rf_PrintDAGList(rd_dag_h); 384 1.3 oster } 385 1.9 oster #endif 386 1.33 mrg RF_LOCK_MCPAIR(mcpair); 387 1.3 oster mcpair->flag = 0; 388 1.33 mrg RF_UNLOCK_MCPAIR(mcpair); 389 1.18 oster 390 1.3 oster rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 391 1.3 oster (void *) mcpair); 392 1.18 oster 393 1.33 mrg RF_LOCK_MCPAIR(mcpair); 394 1.3 oster while (mcpair->flag == 0) { 395 1.3 oster RF_WAIT_MCPAIR(mcpair); 396 1.3 oster } 397 1.33 mrg RF_UNLOCK_MCPAIR(mcpair); 398 1.3 oster 399 1.3 oster if (rd_dag_h->status != rf_enable) { 400 1.3 oster RF_ERRORMSG("Unable to verify raid1 parity: can't read stripe\n"); 401 1.3 oster ret = RF_PARITY_COULD_NOT_VERIFY; 402 1.3 oster goto done; 403 1.3 oster } 404 1.3 oster /* 405 1.3 oster * buf1 is the beginning of the data blocks chunk 406 1.3 oster * buf2 is the beginning of the parity blocks chunk 407 1.3 oster */ 408 1.26 christos buf1 = bf; 409 1.26 christos buf2 = bf + (nbytes * layoutPtr->numDataCol); 410 1.3 oster ret = RF_PARITY_OKAY; 411 1.3 oster /* 412 1.3 oster * bbufs is "bad bufs"- an array whose entries are the data 413 1.3 oster * column numbers where we had miscompares. (That is, column 0 414 1.3 oster * and column 1 of the array are mirror copies, and are considered 415 1.3 oster * "data column 0" for this purpose). 416 1.3 oster */ 417 1.36 christos bbufs = RF_MallocAndAdd(layoutPtr->numParityCol * sizeof(*bbufs), 418 1.3 oster allocList); 419 1.3 oster nbad = 0; 420 1.3 oster /* 421 1.3 oster * Check data vs "parity" (mirror copy). 422 1.3 oster */ 423 1.3 oster for (i = 0; i < layoutPtr->numDataCol; i++) { 424 1.10 oster #if RF_DEBUG_VERIFYPARITY 425 1.3 oster if (rf_verifyParityDebug) { 426 1.5 oster printf("raid%d: RAID1 parity verify %d bytes: i=%d buf1=%lx buf2=%lx buf=%lx\n", 427 1.25 perry raidPtr->raidid, nbytes, i, (long) buf1, 428 1.26 christos (long) buf2, (long) bf); 429 1.3 oster } 430 1.10 oster #endif 431 1.7 thorpej ret = memcmp(buf1, buf2, nbytes); 432 1.3 oster if (ret) { 433 1.10 oster #if RF_DEBUG_VERIFYPARITY 434 1.3 oster if (rf_verifyParityDebug > 1) { 435 1.3 oster for (j = 0; j < nbytes; j++) { 436 1.3 oster if (buf1[j] != buf2[j]) 437 1.3 oster break; 438 1.3 oster } 439 1.3 oster printf("psid=%ld j=%d\n", (long) psID, j); 440 1.3 oster printf("buf1 %02x %02x %02x %02x %02x\n", buf1[0] & 0xff, 441 1.3 oster buf1[1] & 0xff, buf1[2] & 0xff, buf1[3] & 0xff, buf1[4] & 0xff); 442 1.3 oster printf("buf2 %02x %02x %02x %02x %02x\n", buf2[0] & 0xff, 443 1.3 oster buf2[1] & 0xff, buf2[2] & 0xff, buf2[3] & 0xff, buf2[4] & 0xff); 444 1.3 oster } 445 1.3 oster if (rf_verifyParityDebug) { 446 1.5 oster printf("raid%d: RAID1: found bad parity, i=%d\n", raidPtr->raidid, i); 447 1.3 oster } 448 1.10 oster #endif 449 1.3 oster /* 450 1.3 oster * Parity is bad. Keep track of which columns were bad. 451 1.3 oster */ 452 1.3 oster if (bbufs) 453 1.3 oster bbufs[nbad] = i; 454 1.3 oster nbad++; 455 1.3 oster ret = RF_PARITY_BAD; 456 1.3 oster } 457 1.3 oster buf1 += nbytes; 458 1.3 oster buf2 += nbytes; 459 1.3 oster } 460 1.3 oster 461 1.3 oster if ((ret != RF_PARITY_OKAY) && correct_it) { 462 1.3 oster ret = RF_PARITY_COULD_NOT_CORRECT; 463 1.10 oster #if RF_DEBUG_VERIFYPARITY 464 1.3 oster if (rf_verifyParityDebug) { 465 1.5 oster printf("raid%d: RAID1 parity verify: parity not correct\n", raidPtr->raidid); 466 1.3 oster } 467 1.10 oster #endif 468 1.3 oster if (bbufs == NULL) 469 1.3 oster goto done; 470 1.3 oster /* 471 1.3 oster * Make a DAG with one write node for each bad unit. We'll simply 472 1.3 oster * write the contents of the data unit onto the parity unit for 473 1.3 oster * correction. (It's possible that the mirror copy was the correct 474 1.3 oster * copy, and that we're spooging good data by writing bad over it, 475 1.3 oster * but there's no way we can know that. 476 1.3 oster */ 477 1.26 christos wr_dag_h = rf_MakeSimpleDAG(raidPtr, nbad, nbytes, bf, 478 1.3 oster rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wnp", allocList, flags, 479 1.3 oster RF_IO_NORMAL_PRIORITY); 480 1.3 oster if (wr_dag_h == NULL) 481 1.3 oster goto done; 482 1.3 oster wrBlock = wr_dag_h->succedents[0]; 483 1.3 oster /* 484 1.3 oster * Fill in a write node for each bad compare. 485 1.3 oster */ 486 1.3 oster for (i = 0; i < nbad; i++) { 487 1.3 oster j = i + layoutPtr->numDataCol; 488 1.3 oster pda = blockNode->succedents[j]->params[0].p; 489 1.3 oster pda->bufPtr = blockNode->succedents[i]->params[1].p; 490 1.3 oster wrBlock->succedents[i]->params[0].p = pda; 491 1.3 oster wrBlock->succedents[i]->params[1].p = pda->bufPtr; 492 1.3 oster wrBlock->succedents[i]->params[2].v = psID; 493 1.32 oster wrBlock->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 494 1.3 oster } 495 1.19 oster #if RF_ACC_TRACE > 0 496 1.36 christos memset(&tracerec, 0, sizeof(tracerec)); 497 1.3 oster wr_dag_h->tracerec = &tracerec; 498 1.19 oster #endif 499 1.9 oster #if 0 500 1.3 oster if (rf_verifyParityDebug > 1) { 501 1.3 oster printf("Parity verify write dag:\n"); 502 1.3 oster rf_PrintDAGList(wr_dag_h); 503 1.3 oster } 504 1.9 oster #endif 505 1.33 mrg RF_LOCK_MCPAIR(mcpair); 506 1.3 oster mcpair->flag = 0; 507 1.33 mrg RF_UNLOCK_MCPAIR(mcpair); 508 1.20 oster 509 1.3 oster /* fire off the write DAG */ 510 1.3 oster rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 511 1.3 oster (void *) mcpair); 512 1.20 oster 513 1.33 mrg RF_LOCK_MCPAIR(mcpair); 514 1.3 oster while (!mcpair->flag) { 515 1.33 mrg RF_WAIT_MCPAIR(mcpair); 516 1.3 oster } 517 1.33 mrg RF_UNLOCK_MCPAIR(mcpair); 518 1.3 oster if (wr_dag_h->status != rf_enable) { 519 1.3 oster RF_ERRORMSG("Unable to correct RAID1 parity in VerifyParity\n"); 520 1.3 oster goto done; 521 1.3 oster } 522 1.3 oster ret = RF_PARITY_CORRECTED; 523 1.3 oster } 524 1.1 oster done: 525 1.3 oster /* 526 1.3 oster * All done. We might've gotten here without doing part of the function, 527 1.3 oster * so cleanup what we have to and return our running status. 528 1.3 oster */ 529 1.3 oster if (asm_h) 530 1.38 oster rf_FreeAccessStripeMap(raidPtr, asm_h); 531 1.3 oster if (rd_dag_h) 532 1.3 oster rf_FreeDAG(rd_dag_h); 533 1.3 oster if (wr_dag_h) 534 1.3 oster rf_FreeDAG(wr_dag_h); 535 1.3 oster if (mcpair) 536 1.38 oster rf_FreeMCPair(raidPtr, mcpair); 537 1.3 oster rf_FreeAllocList(allocList); 538 1.10 oster #if RF_DEBUG_VERIFYPARITY 539 1.3 oster if (rf_verifyParityDebug) { 540 1.25 perry printf("raid%d: RAID1 parity verify, returning %d\n", 541 1.5 oster raidPtr->raidid, ret); 542 1.3 oster } 543 1.10 oster #endif 544 1.3 oster return (ret); 545 1.1 oster } 546 1.1 oster 547 1.25 perry /* rbuf - the recon buffer to submit 548 1.15 oster * keep_it - whether we can keep this buffer or we have to return it 549 1.15 oster * use_committed - whether to use a committed or an available recon buffer 550 1.15 oster */ 551 1.15 oster 552 1.25 perry int 553 1.15 oster rf_SubmitReconBufferRAID1(RF_ReconBuffer_t *rbuf, int keep_it, 554 1.15 oster int use_committed) 555 1.1 oster { 556 1.3 oster RF_ReconParityStripeStatus_t *pssPtr; 557 1.3 oster RF_ReconCtrl_t *reconCtrlPtr; 558 1.21 oster int retcode; 559 1.37 christos RF_CallbackValueDesc_t *cb, *p; 560 1.3 oster RF_ReconBuffer_t *t; 561 1.3 oster RF_Raid_t *raidPtr; 562 1.30 christos void *ta; 563 1.3 oster 564 1.3 oster retcode = 0; 565 1.3 oster 566 1.3 oster raidPtr = rbuf->raidPtr; 567 1.14 oster reconCtrlPtr = raidPtr->reconControl; 568 1.3 oster 569 1.3 oster RF_ASSERT(rbuf); 570 1.3 oster RF_ASSERT(rbuf->col != reconCtrlPtr->fcol); 571 1.3 oster 572 1.11 oster #if RF_DEBUG_RECON 573 1.3 oster if (rf_reconbufferDebug) { 574 1.14 oster printf("raid%d: RAID1 reconbuffer submission c%d psid %ld ru%d (failed offset %ld)\n", 575 1.25 perry raidPtr->raidid, rbuf->col, 576 1.5 oster (long) rbuf->parityStripeID, rbuf->which_ru, 577 1.5 oster (long) rbuf->failedDiskSectorOffset); 578 1.3 oster } 579 1.10 oster #endif 580 1.3 oster if (rf_reconDebug) { 581 1.30 christos unsigned char *b = rbuf->buffer; 582 1.3 oster printf("RAID1 reconbuffer submit psid %ld buf %lx\n", 583 1.3 oster (long) rbuf->parityStripeID, (long) rbuf->buffer); 584 1.3 oster printf("RAID1 psid %ld %02x %02x %02x %02x %02x\n", 585 1.30 christos (long)rbuf->parityStripeID, b[0], b[1], b[2], b[3], b[4]); 586 1.3 oster } 587 1.14 oster RF_LOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID); 588 1.3 oster 589 1.34 mrg rf_lock_mutex2(reconCtrlPtr->rb_mutex); 590 1.24 oster while(reconCtrlPtr->rb_lock) { 591 1.34 mrg rf_wait_cond2(reconCtrlPtr->rb_cv, reconCtrlPtr->rb_mutex); 592 1.24 oster } 593 1.24 oster reconCtrlPtr->rb_lock = 1; 594 1.34 mrg rf_unlock_mutex2(reconCtrlPtr->rb_mutex); 595 1.3 oster 596 1.3 oster pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, 597 1.21 oster rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, NULL); 598 1.3 oster RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten 599 1.3 oster * an rbuf for it */ 600 1.3 oster 601 1.3 oster /* 602 1.3 oster * Since this is simple mirroring, the first submission for a stripe is also 603 1.3 oster * treated as the last. 604 1.3 oster */ 605 1.3 oster 606 1.3 oster t = NULL; 607 1.3 oster if (keep_it) { 608 1.11 oster #if RF_DEBUG_RECON 609 1.3 oster if (rf_reconbufferDebug) { 610 1.25 perry printf("raid%d: RAID1 rbuf submission: keeping rbuf\n", 611 1.5 oster raidPtr->raidid); 612 1.3 oster } 613 1.10 oster #endif 614 1.3 oster t = rbuf; 615 1.3 oster } else { 616 1.3 oster if (use_committed) { 617 1.11 oster #if RF_DEBUG_RECON 618 1.3 oster if (rf_reconbufferDebug) { 619 1.5 oster printf("raid%d: RAID1 rbuf submission: using committed rbuf\n", raidPtr->raidid); 620 1.3 oster } 621 1.10 oster #endif 622 1.3 oster t = reconCtrlPtr->committedRbufs; 623 1.3 oster RF_ASSERT(t); 624 1.3 oster reconCtrlPtr->committedRbufs = t->next; 625 1.3 oster t->next = NULL; 626 1.3 oster } else 627 1.3 oster if (reconCtrlPtr->floatingRbufs) { 628 1.11 oster #if RF_DEBUG_RECON 629 1.3 oster if (rf_reconbufferDebug) { 630 1.5 oster printf("raid%d: RAID1 rbuf submission: using floating rbuf\n", raidPtr->raidid); 631 1.3 oster } 632 1.10 oster #endif 633 1.3 oster t = reconCtrlPtr->floatingRbufs; 634 1.3 oster reconCtrlPtr->floatingRbufs = t->next; 635 1.3 oster t->next = NULL; 636 1.3 oster } 637 1.3 oster } 638 1.3 oster if (t == NULL) { 639 1.11 oster #if RF_DEBUG_RECON 640 1.3 oster if (rf_reconbufferDebug) { 641 1.5 oster printf("raid%d: RAID1 rbuf submission: waiting for rbuf\n", raidPtr->raidid); 642 1.3 oster } 643 1.10 oster #endif 644 1.3 oster RF_ASSERT((keep_it == 0) && (use_committed == 0)); 645 1.3 oster raidPtr->procsInBufWait++; 646 1.3 oster if ((raidPtr->procsInBufWait == (raidPtr->numCol - 1)) 647 1.3 oster && (raidPtr->numFullReconBuffers == 0)) { 648 1.3 oster /* ruh-ro */ 649 1.3 oster RF_ERRORMSG("Buffer wait deadlock\n"); 650 1.14 oster rf_PrintPSStatusTable(raidPtr); 651 1.3 oster RF_PANIC(); 652 1.3 oster } 653 1.3 oster pssPtr->flags |= RF_PSS_BUFFERWAIT; 654 1.38 oster cb = rf_AllocCallbackValueDesc(raidPtr); 655 1.3 oster cb->col = rbuf->col; 656 1.37 christos cb->v = rbuf->parityStripeID; 657 1.3 oster cb->next = NULL; 658 1.3 oster if (reconCtrlPtr->bufferWaitList == NULL) { 659 1.3 oster /* we are the wait list- lucky us */ 660 1.3 oster reconCtrlPtr->bufferWaitList = cb; 661 1.3 oster } else { 662 1.3 oster /* append to wait list */ 663 1.3 oster for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next); 664 1.3 oster p->next = cb; 665 1.3 oster } 666 1.3 oster retcode = 1; 667 1.3 oster goto out; 668 1.3 oster } 669 1.3 oster if (t != rbuf) { 670 1.3 oster t->col = reconCtrlPtr->fcol; 671 1.3 oster t->parityStripeID = rbuf->parityStripeID; 672 1.3 oster t->which_ru = rbuf->which_ru; 673 1.3 oster t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset; 674 1.3 oster t->spCol = rbuf->spCol; 675 1.3 oster t->spOffset = rbuf->spOffset; 676 1.3 oster /* Swap buffers. DANCE! */ 677 1.3 oster ta = t->buffer; 678 1.3 oster t->buffer = rbuf->buffer; 679 1.3 oster rbuf->buffer = ta; 680 1.3 oster } 681 1.3 oster /* 682 1.3 oster * Use the rbuf we've been given as the target. 683 1.3 oster */ 684 1.3 oster RF_ASSERT(pssPtr->rbuf == NULL); 685 1.3 oster pssPtr->rbuf = t; 686 1.3 oster 687 1.3 oster t->count = 1; 688 1.3 oster /* 689 1.3 oster * Below, we use 1 for numDataCol (which is equal to the count in the 690 1.3 oster * previous line), so we'll always be done. 691 1.3 oster */ 692 1.3 oster rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, 1); 693 1.1 oster 694 1.1 oster out: 695 1.14 oster RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID); 696 1.34 mrg rf_lock_mutex2(reconCtrlPtr->rb_mutex); 697 1.24 oster reconCtrlPtr->rb_lock = 0; 698 1.34 mrg rf_broadcast_cond2(reconCtrlPtr->rb_cv); 699 1.34 mrg rf_unlock_mutex2(reconCtrlPtr->rb_mutex); 700 1.11 oster #if RF_DEBUG_RECON 701 1.3 oster if (rf_reconbufferDebug) { 702 1.25 perry printf("raid%d: RAID1 rbuf submission: returning %d\n", 703 1.5 oster raidPtr->raidid, retcode); 704 1.3 oster } 705 1.10 oster #endif 706 1.3 oster return (retcode); 707 1.1 oster } 708 1.31 oster 709 1.31 oster RF_HeadSepLimit_t 710 1.31 oster rf_GetDefaultHeadSepLimitRAID1(RF_Raid_t *raidPtr) 711 1.31 oster { 712 1.31 oster return (10); 713 1.31 oster } 714 1.31 oster 715