1 1.16 christos /* $NetBSD: rf_chaindecluster.c,v 1.16 2019/02/09 03:33:59 christos Exp $ */ 2 1.1 oster /* 3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University. 4 1.1 oster * All rights reserved. 5 1.1 oster * 6 1.1 oster * Author: Khalil Amiri 7 1.1 oster * 8 1.1 oster * Permission to use, copy, modify and distribute this software and 9 1.1 oster * its documentation is hereby granted, provided that both the copyright 10 1.1 oster * notice and this permission notice appear in all copies of the 11 1.1 oster * software, derivative works or modified versions, and any portions 12 1.1 oster * thereof, and that both notices appear in supporting documentation. 13 1.1 oster * 14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 1.1 oster * 18 1.1 oster * Carnegie Mellon requests users of this software to return to 19 1.1 oster * 20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU 21 1.1 oster * School of Computer Science 22 1.1 oster * Carnegie Mellon University 23 1.1 oster * Pittsburgh PA 15213-3890 24 1.1 oster * 25 1.1 oster * any improvements or extensions that they make and grant Carnegie the 26 1.1 oster * rights to redistribute these changes. 27 1.1 oster */ 28 1.1 oster 29 1.1 oster /****************************************************************************** 30 1.1 oster * 31 1.1 oster * rf_chaindecluster.c -- implements chained declustering 32 1.1 oster * 33 1.1 oster *****************************************************************************/ 34 1.8 lukem 35 1.8 lukem #include <sys/cdefs.h> 36 1.16 christos __KERNEL_RCSID(0, "$NetBSD: rf_chaindecluster.c,v 1.16 2019/02/09 03:33:59 christos Exp $"); 37 1.1 oster 38 1.1 oster #include "rf_archs.h" 39 1.6 oster 40 1.12 perry #if (RF_INCLUDE_CHAINDECLUSTER > 0) 41 1.6 oster 42 1.7 oster #include <dev/raidframe/raidframevar.h> 43 1.7 oster 44 1.1 oster #include "rf_raid.h" 45 1.1 oster #include "rf_chaindecluster.h" 46 1.1 oster #include "rf_dag.h" 47 1.1 oster #include "rf_dagutils.h" 48 1.1 oster #include "rf_dagffrd.h" 49 1.1 oster #include "rf_dagffwr.h" 50 1.1 oster #include "rf_dagdegrd.h" 51 1.1 oster #include "rf_dagfuncs.h" 52 1.1 oster #include "rf_general.h" 53 1.1 oster #include "rf_utils.h" 54 1.1 oster 55 1.1 oster typedef struct RF_ChaindeclusterConfigInfo_s { 56 1.3 oster RF_RowCol_t **stripeIdentifier; /* filled in at config time and used 57 1.3 oster * by IdentifyStripe */ 58 1.3 oster RF_StripeCount_t numSparingRegions; 59 1.3 oster RF_StripeCount_t stripeUnitsPerSparingRegion; 60 1.3 oster RF_SectorNum_t mirrorStripeOffset; 61 1.3 oster } RF_ChaindeclusterConfigInfo_t; 62 1.3 oster 63 1.12 perry int 64 1.15 christos rf_ConfigureChainDecluster(RF_ShutdownList_t **listp, 65 1.15 christos RF_Raid_t *raidPtr, RF_Config_t *cfgPtr) 66 1.1 oster { 67 1.3 oster RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 68 1.3 oster RF_StripeCount_t num_used_stripeUnitsPerDisk; 69 1.3 oster RF_ChaindeclusterConfigInfo_t *info; 70 1.3 oster RF_RowCol_t i; 71 1.3 oster 72 1.3 oster /* create a Chained Declustering configuration structure */ 73 1.16 christos info = RF_MallocAndAdd(sizeof(*info), raidPtr->cleanupList); 74 1.3 oster if (info == NULL) 75 1.3 oster return (ENOMEM); 76 1.3 oster layoutPtr->layoutSpecificInfo = (void *) info; 77 1.3 oster 78 1.3 oster /* fill in the config structure. */ 79 1.3 oster info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, 2, raidPtr->cleanupList); 80 1.3 oster if (info->stripeIdentifier == NULL) 81 1.3 oster return (ENOMEM); 82 1.3 oster for (i = 0; i < raidPtr->numCol; i++) { 83 1.3 oster info->stripeIdentifier[i][0] = i % raidPtr->numCol; 84 1.3 oster info->stripeIdentifier[i][1] = (i + 1) % raidPtr->numCol; 85 1.3 oster } 86 1.3 oster 87 1.3 oster /* fill in the remaining layout parameters */ 88 1.3 oster num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk % 89 1.3 oster (2 * raidPtr->numCol - 2)); 90 1.3 oster info->numSparingRegions = num_used_stripeUnitsPerDisk / (2 * raidPtr->numCol - 2); 91 1.3 oster info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1); 92 1.3 oster info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol - 1); 93 1.3 oster layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion; 94 1.3 oster layoutPtr->numDataCol = 1; 95 1.3 oster layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; 96 1.3 oster layoutPtr->numParityCol = 1; 97 1.3 oster 98 1.3 oster layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk; 99 1.3 oster 100 1.3 oster raidPtr->sectorsPerDisk = 101 1.3 oster num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; 102 1.1 oster 103 1.3 oster raidPtr->totalSectors = 104 1.3 oster (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit; 105 1.1 oster 106 1.3 oster layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; 107 1.1 oster 108 1.3 oster return (0); 109 1.1 oster } 110 1.1 oster 111 1.12 perry RF_ReconUnitCount_t 112 1.11 oster rf_GetNumSpareRUsChainDecluster(RF_Raid_t *raidPtr) 113 1.1 oster { 114 1.3 oster RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; 115 1.1 oster 116 1.3 oster /* 117 1.3 oster * The layout uses two stripe units per disk as spare within each 118 1.3 oster * sparing region. 119 1.3 oster */ 120 1.3 oster return (2 * info->numSparingRegions); 121 1.1 oster } 122 1.1 oster 123 1.1 oster 124 1.1 oster /* Maps to the primary copy of the data, i.e. the first mirror pair */ 125 1.12 perry void 126 1.11 oster rf_MapSectorChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, 127 1.11 oster RF_RowCol_t *col, RF_SectorNum_t *diskSector, 128 1.11 oster int remap) 129 1.1 oster { 130 1.3 oster RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; 131 1.3 oster RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; 132 1.3 oster RF_SectorNum_t index_within_region, index_within_disk; 133 1.3 oster RF_StripeNum_t sparing_region_id; 134 1.3 oster int col_before_remap; 135 1.3 oster 136 1.3 oster sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; 137 1.3 oster index_within_region = SUID % info->stripeUnitsPerSparingRegion; 138 1.3 oster index_within_disk = index_within_region / raidPtr->numCol; 139 1.3 oster col_before_remap = SUID % raidPtr->numCol; 140 1.3 oster 141 1.3 oster if (!remap) { 142 1.3 oster *col = col_before_remap; 143 1.3 oster *diskSector = (index_within_disk + ((raidPtr->numCol - 1) * sparing_region_id)) * 144 1.3 oster raidPtr->Layout.sectorsPerStripeUnit; 145 1.3 oster *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 146 1.3 oster } else { 147 1.3 oster /* remap sector to spare space... */ 148 1.3 oster *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; 149 1.3 oster *diskSector += (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit; 150 1.3 oster *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 151 1.3 oster index_within_disk = index_within_region / raidPtr->numCol; 152 1.3 oster if (index_within_disk < col_before_remap) 153 1.3 oster *col = index_within_disk; 154 1.3 oster else 155 1.3 oster if (index_within_disk == raidPtr->numCol - 2) { 156 1.3 oster *col = (col_before_remap + raidPtr->numCol - 1) % raidPtr->numCol; 157 1.3 oster *diskSector += raidPtr->Layout.sectorsPerStripeUnit; 158 1.3 oster } else 159 1.3 oster *col = (index_within_disk + 2) % raidPtr->numCol; 160 1.3 oster } 161 1.1 oster 162 1.1 oster } 163 1.1 oster 164 1.1 oster 165 1.1 oster 166 1.1 oster /* Maps to the second copy of the mirror pair, which is chain declustered. The second copy is contained 167 1.3 oster in the next disk (mod numCol) after the disk containing the primary copy. 168 1.1 oster The offset into the disk is one-half disk down */ 169 1.12 perry void 170 1.11 oster rf_MapParityChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, 171 1.11 oster RF_RowCol_t *col, RF_SectorNum_t *diskSector, 172 1.11 oster int remap) 173 1.1 oster { 174 1.3 oster RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; 175 1.3 oster RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; 176 1.3 oster RF_SectorNum_t index_within_region, index_within_disk; 177 1.3 oster RF_StripeNum_t sparing_region_id; 178 1.3 oster int col_before_remap; 179 1.3 oster 180 1.3 oster if (!remap) { 181 1.3 oster *col = SUID % raidPtr->numCol; 182 1.3 oster *col = (*col + 1) % raidPtr->numCol; 183 1.3 oster *diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit; 184 1.3 oster *diskSector += (SUID / raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; 185 1.3 oster *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 186 1.3 oster } else { 187 1.3 oster /* remap parity to spare space ... */ 188 1.3 oster sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; 189 1.3 oster index_within_region = SUID % info->stripeUnitsPerSparingRegion; 190 1.3 oster index_within_disk = index_within_region / raidPtr->numCol; 191 1.3 oster *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; 192 1.3 oster *diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; 193 1.3 oster *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 194 1.3 oster col_before_remap = SUID % raidPtr->numCol; 195 1.3 oster if (index_within_disk < col_before_remap) 196 1.3 oster *col = index_within_disk; 197 1.3 oster else 198 1.3 oster if (index_within_disk == raidPtr->numCol - 2) { 199 1.3 oster *col = (col_before_remap + 2) % raidPtr->numCol; 200 1.3 oster *diskSector -= raidPtr->Layout.sectorsPerStripeUnit; 201 1.3 oster } else 202 1.3 oster *col = (index_within_disk + 2) % raidPtr->numCol; 203 1.3 oster } 204 1.1 oster 205 1.1 oster } 206 1.1 oster 207 1.12 perry void 208 1.11 oster rf_IdentifyStripeChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, 209 1.11 oster RF_RowCol_t **diskids) 210 1.1 oster { 211 1.3 oster RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; 212 1.3 oster RF_StripeNum_t SUID; 213 1.3 oster RF_RowCol_t col; 214 1.3 oster 215 1.3 oster SUID = addr / raidPtr->Layout.sectorsPerStripeUnit; 216 1.3 oster col = SUID % raidPtr->numCol; 217 1.3 oster *diskids = info->stripeIdentifier[col]; 218 1.1 oster } 219 1.1 oster 220 1.12 perry void 221 1.15 christos rf_MapSIDToPSIDChainDecluster(RF_RaidLayout_t *layoutPtr, 222 1.11 oster RF_StripeNum_t stripeID, 223 1.11 oster RF_StripeNum_t *psID, 224 1.11 oster RF_ReconUnitNum_t *which_ru) 225 1.1 oster { 226 1.3 oster *which_ru = 0; 227 1.3 oster *psID = stripeID; 228 1.1 oster } 229 1.1 oster /****************************************************************************** 230 1.1 oster * select a graph to perform a single-stripe access 231 1.1 oster * 232 1.1 oster * Parameters: raidPtr - description of the physical array 233 1.1 oster * type - type of operation (read or write) requested 234 1.1 oster * asmap - logical & physical addresses for this access 235 1.1 oster * createFunc - function to use to create the graph (return value) 236 1.1 oster *****************************************************************************/ 237 1.1 oster 238 1.12 perry void 239 1.11 oster rf_RAIDCDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, 240 1.11 oster RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc) 241 1.1 oster #if 0 242 1.3 oster void (**createFunc) (RF_Raid_t *, RF_AccessStripeMap_t *, 243 1.3 oster RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, 244 1.5 oster RF_AllocListElem_t *) 245 1.1 oster #endif 246 1.1 oster { 247 1.3 oster RF_ASSERT(RF_IO_IS_R_OR_W(type)); 248 1.1 oster 249 1.3 oster if (asmap->numDataFailed + asmap->numParityFailed > 1) { 250 1.3 oster RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); 251 1.3 oster *createFunc = NULL; 252 1.3 oster return; 253 1.3 oster } 254 1.3 oster *createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; 255 1.3 oster 256 1.3 oster if (type == RF_IO_TYPE_READ) { 257 1.10 oster if ((raidPtr->status == rf_rs_degraded) || (raidPtr->status == rf_rs_reconstructing)) 258 1.3 oster *createFunc = (RF_VoidFuncPtr) rf_CreateRaidCDegradedReadDAG; /* array status is 259 1.3 oster * degraded, implement 260 1.3 oster * workload shifting */ 261 1.3 oster else 262 1.3 oster *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorPartitionReadDAG; /* array status not 263 1.3 oster * degraded, so use 264 1.3 oster * mirror partition dag */ 265 1.3 oster } else 266 1.3 oster *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; 267 1.1 oster } 268 1.6 oster #endif /* (RF_INCLUDE_CHAINDECLUSTER > 0) */ 269