rf_chaindecluster.c revision 1.6.4.1 1 1.6.4.1 thorpej /* $NetBSD: rf_chaindecluster.c,v 1.6.4.1 2002/01/10 19:57:37 thorpej Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Author: Khalil Amiri
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster
29 1.1 oster /******************************************************************************
30 1.1 oster *
31 1.1 oster * rf_chaindecluster.c -- implements chained declustering
32 1.1 oster *
33 1.1 oster *****************************************************************************/
34 1.1 oster
35 1.6.4.1 thorpej #include <sys/cdefs.h>
36 1.6.4.1 thorpej __KERNEL_RCSID(0, "$NetBSD: rf_chaindecluster.c,v 1.6.4.1 2002/01/10 19:57:37 thorpej Exp $");
37 1.6.4.1 thorpej
38 1.1 oster #include "rf_archs.h"
39 1.6 oster
40 1.6 oster #if (RF_INCLUDE_CHAINDECLUSTER > 0)
41 1.6 oster
42 1.6.4.1 thorpej #include <dev/raidframe/raidframevar.h>
43 1.6.4.1 thorpej
44 1.1 oster #include "rf_raid.h"
45 1.1 oster #include "rf_chaindecluster.h"
46 1.1 oster #include "rf_dag.h"
47 1.1 oster #include "rf_dagutils.h"
48 1.1 oster #include "rf_dagffrd.h"
49 1.1 oster #include "rf_dagffwr.h"
50 1.1 oster #include "rf_dagdegrd.h"
51 1.1 oster #include "rf_dagfuncs.h"
52 1.1 oster #include "rf_general.h"
53 1.1 oster #include "rf_utils.h"
54 1.1 oster
55 1.1 oster typedef struct RF_ChaindeclusterConfigInfo_s {
56 1.3 oster RF_RowCol_t **stripeIdentifier; /* filled in at config time and used
57 1.3 oster * by IdentifyStripe */
58 1.3 oster RF_StripeCount_t numSparingRegions;
59 1.3 oster RF_StripeCount_t stripeUnitsPerSparingRegion;
60 1.3 oster RF_SectorNum_t mirrorStripeOffset;
61 1.3 oster } RF_ChaindeclusterConfigInfo_t;
62 1.3 oster
63 1.3 oster int
64 1.3 oster rf_ConfigureChainDecluster(
65 1.3 oster RF_ShutdownList_t ** listp,
66 1.3 oster RF_Raid_t * raidPtr,
67 1.3 oster RF_Config_t * cfgPtr)
68 1.1 oster {
69 1.3 oster RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
70 1.3 oster RF_StripeCount_t num_used_stripeUnitsPerDisk;
71 1.3 oster RF_ChaindeclusterConfigInfo_t *info;
72 1.3 oster RF_RowCol_t i;
73 1.3 oster
74 1.3 oster /* create a Chained Declustering configuration structure */
75 1.3 oster RF_MallocAndAdd(info, sizeof(RF_ChaindeclusterConfigInfo_t), (RF_ChaindeclusterConfigInfo_t *), raidPtr->cleanupList);
76 1.3 oster if (info == NULL)
77 1.3 oster return (ENOMEM);
78 1.3 oster layoutPtr->layoutSpecificInfo = (void *) info;
79 1.3 oster
80 1.3 oster /* fill in the config structure. */
81 1.3 oster info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, 2, raidPtr->cleanupList);
82 1.3 oster if (info->stripeIdentifier == NULL)
83 1.3 oster return (ENOMEM);
84 1.3 oster for (i = 0; i < raidPtr->numCol; i++) {
85 1.3 oster info->stripeIdentifier[i][0] = i % raidPtr->numCol;
86 1.3 oster info->stripeIdentifier[i][1] = (i + 1) % raidPtr->numCol;
87 1.3 oster }
88 1.3 oster
89 1.3 oster RF_ASSERT(raidPtr->numRow == 1);
90 1.3 oster
91 1.3 oster /* fill in the remaining layout parameters */
92 1.3 oster num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk %
93 1.3 oster (2 * raidPtr->numCol - 2));
94 1.3 oster info->numSparingRegions = num_used_stripeUnitsPerDisk / (2 * raidPtr->numCol - 2);
95 1.3 oster info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1);
96 1.3 oster info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol - 1);
97 1.3 oster layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion;
98 1.3 oster layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
99 1.3 oster layoutPtr->numDataCol = 1;
100 1.3 oster layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
101 1.3 oster layoutPtr->numParityCol = 1;
102 1.3 oster
103 1.3 oster layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk;
104 1.3 oster
105 1.3 oster raidPtr->sectorsPerDisk =
106 1.3 oster num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
107 1.1 oster
108 1.3 oster raidPtr->totalSectors =
109 1.3 oster (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit;
110 1.1 oster
111 1.3 oster layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit;
112 1.1 oster
113 1.3 oster return (0);
114 1.1 oster }
115 1.1 oster
116 1.3 oster RF_ReconUnitCount_t
117 1.3 oster rf_GetNumSpareRUsChainDecluster(raidPtr)
118 1.3 oster RF_Raid_t *raidPtr;
119 1.1 oster {
120 1.3 oster RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
121 1.1 oster
122 1.3 oster /*
123 1.3 oster * The layout uses two stripe units per disk as spare within each
124 1.3 oster * sparing region.
125 1.3 oster */
126 1.3 oster return (2 * info->numSparingRegions);
127 1.1 oster }
128 1.1 oster
129 1.1 oster
130 1.1 oster /* Maps to the primary copy of the data, i.e. the first mirror pair */
131 1.3 oster void
132 1.3 oster rf_MapSectorChainDecluster(
133 1.3 oster RF_Raid_t * raidPtr,
134 1.3 oster RF_RaidAddr_t raidSector,
135 1.3 oster RF_RowCol_t * row,
136 1.3 oster RF_RowCol_t * col,
137 1.3 oster RF_SectorNum_t * diskSector,
138 1.3 oster int remap)
139 1.1 oster {
140 1.3 oster RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
141 1.3 oster RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
142 1.3 oster RF_SectorNum_t index_within_region, index_within_disk;
143 1.3 oster RF_StripeNum_t sparing_region_id;
144 1.3 oster int col_before_remap;
145 1.3 oster
146 1.3 oster *row = 0;
147 1.3 oster sparing_region_id = SUID / info->stripeUnitsPerSparingRegion;
148 1.3 oster index_within_region = SUID % info->stripeUnitsPerSparingRegion;
149 1.3 oster index_within_disk = index_within_region / raidPtr->numCol;
150 1.3 oster col_before_remap = SUID % raidPtr->numCol;
151 1.3 oster
152 1.3 oster if (!remap) {
153 1.3 oster *col = col_before_remap;
154 1.3 oster *diskSector = (index_within_disk + ((raidPtr->numCol - 1) * sparing_region_id)) *
155 1.3 oster raidPtr->Layout.sectorsPerStripeUnit;
156 1.3 oster *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
157 1.3 oster } else {
158 1.3 oster /* remap sector to spare space... */
159 1.3 oster *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit;
160 1.3 oster *diskSector += (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit;
161 1.3 oster *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
162 1.3 oster index_within_disk = index_within_region / raidPtr->numCol;
163 1.3 oster if (index_within_disk < col_before_remap)
164 1.3 oster *col = index_within_disk;
165 1.3 oster else
166 1.3 oster if (index_within_disk == raidPtr->numCol - 2) {
167 1.3 oster *col = (col_before_remap + raidPtr->numCol - 1) % raidPtr->numCol;
168 1.3 oster *diskSector += raidPtr->Layout.sectorsPerStripeUnit;
169 1.3 oster } else
170 1.3 oster *col = (index_within_disk + 2) % raidPtr->numCol;
171 1.3 oster }
172 1.1 oster
173 1.1 oster }
174 1.1 oster
175 1.1 oster
176 1.1 oster
177 1.1 oster /* Maps to the second copy of the mirror pair, which is chain declustered. The second copy is contained
178 1.3 oster in the next disk (mod numCol) after the disk containing the primary copy.
179 1.1 oster The offset into the disk is one-half disk down */
180 1.3 oster void
181 1.3 oster rf_MapParityChainDecluster(
182 1.3 oster RF_Raid_t * raidPtr,
183 1.3 oster RF_RaidAddr_t raidSector,
184 1.3 oster RF_RowCol_t * row,
185 1.3 oster RF_RowCol_t * col,
186 1.3 oster RF_SectorNum_t * diskSector,
187 1.3 oster int remap)
188 1.1 oster {
189 1.3 oster RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
190 1.3 oster RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
191 1.3 oster RF_SectorNum_t index_within_region, index_within_disk;
192 1.3 oster RF_StripeNum_t sparing_region_id;
193 1.3 oster int col_before_remap;
194 1.3 oster
195 1.3 oster *row = 0;
196 1.3 oster if (!remap) {
197 1.3 oster *col = SUID % raidPtr->numCol;
198 1.3 oster *col = (*col + 1) % raidPtr->numCol;
199 1.3 oster *diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit;
200 1.3 oster *diskSector += (SUID / raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit;
201 1.3 oster *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
202 1.3 oster } else {
203 1.3 oster /* remap parity to spare space ... */
204 1.3 oster sparing_region_id = SUID / info->stripeUnitsPerSparingRegion;
205 1.3 oster index_within_region = SUID % info->stripeUnitsPerSparingRegion;
206 1.3 oster index_within_disk = index_within_region / raidPtr->numCol;
207 1.3 oster *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit;
208 1.3 oster *diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit;
209 1.3 oster *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
210 1.3 oster col_before_remap = SUID % raidPtr->numCol;
211 1.3 oster if (index_within_disk < col_before_remap)
212 1.3 oster *col = index_within_disk;
213 1.3 oster else
214 1.3 oster if (index_within_disk == raidPtr->numCol - 2) {
215 1.3 oster *col = (col_before_remap + 2) % raidPtr->numCol;
216 1.3 oster *diskSector -= raidPtr->Layout.sectorsPerStripeUnit;
217 1.3 oster } else
218 1.3 oster *col = (index_within_disk + 2) % raidPtr->numCol;
219 1.3 oster }
220 1.1 oster
221 1.1 oster }
222 1.1 oster
223 1.3 oster void
224 1.3 oster rf_IdentifyStripeChainDecluster(
225 1.3 oster RF_Raid_t * raidPtr,
226 1.3 oster RF_RaidAddr_t addr,
227 1.3 oster RF_RowCol_t ** diskids,
228 1.3 oster RF_RowCol_t * outRow)
229 1.1 oster {
230 1.3 oster RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
231 1.3 oster RF_StripeNum_t SUID;
232 1.3 oster RF_RowCol_t col;
233 1.3 oster
234 1.3 oster SUID = addr / raidPtr->Layout.sectorsPerStripeUnit;
235 1.3 oster col = SUID % raidPtr->numCol;
236 1.3 oster *outRow = 0;
237 1.3 oster *diskids = info->stripeIdentifier[col];
238 1.1 oster }
239 1.1 oster
240 1.3 oster void
241 1.3 oster rf_MapSIDToPSIDChainDecluster(
242 1.3 oster RF_RaidLayout_t * layoutPtr,
243 1.3 oster RF_StripeNum_t stripeID,
244 1.3 oster RF_StripeNum_t * psID,
245 1.3 oster RF_ReconUnitNum_t * which_ru)
246 1.1 oster {
247 1.3 oster *which_ru = 0;
248 1.3 oster *psID = stripeID;
249 1.1 oster }
250 1.1 oster /******************************************************************************
251 1.1 oster * select a graph to perform a single-stripe access
252 1.1 oster *
253 1.1 oster * Parameters: raidPtr - description of the physical array
254 1.1 oster * type - type of operation (read or write) requested
255 1.1 oster * asmap - logical & physical addresses for this access
256 1.1 oster * createFunc - function to use to create the graph (return value)
257 1.1 oster *****************************************************************************/
258 1.1 oster
259 1.3 oster void
260 1.3 oster rf_RAIDCDagSelect(
261 1.3 oster RF_Raid_t * raidPtr,
262 1.3 oster RF_IoType_t type,
263 1.3 oster RF_AccessStripeMap_t * asmap,
264 1.3 oster RF_VoidFuncPtr * createFunc)
265 1.1 oster #if 0
266 1.3 oster void (**createFunc) (RF_Raid_t *, RF_AccessStripeMap_t *,
267 1.3 oster RF_DagHeader_t *, void *, RF_RaidAccessFlags_t,
268 1.5 oster RF_AllocListElem_t *)
269 1.1 oster #endif
270 1.1 oster {
271 1.3 oster RF_ASSERT(RF_IO_IS_R_OR_W(type));
272 1.3 oster RF_ASSERT(raidPtr->numRow == 1);
273 1.1 oster
274 1.3 oster if (asmap->numDataFailed + asmap->numParityFailed > 1) {
275 1.3 oster RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n");
276 1.3 oster *createFunc = NULL;
277 1.3 oster return;
278 1.3 oster }
279 1.3 oster *createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG;
280 1.3 oster
281 1.3 oster if (type == RF_IO_TYPE_READ) {
282 1.3 oster if ((raidPtr->status[0] == rf_rs_degraded) || (raidPtr->status[0] == rf_rs_reconstructing))
283 1.3 oster *createFunc = (RF_VoidFuncPtr) rf_CreateRaidCDegradedReadDAG; /* array status is
284 1.3 oster * degraded, implement
285 1.3 oster * workload shifting */
286 1.3 oster else
287 1.3 oster *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorPartitionReadDAG; /* array status not
288 1.3 oster * degraded, so use
289 1.3 oster * mirror partition dag */
290 1.3 oster } else
291 1.3 oster *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG;
292 1.1 oster }
293 1.6 oster #endif /* (RF_INCLUDE_CHAINDECLUSTER > 0) */
294