rf_raid5.c revision 1.2 1 /* $NetBSD: rf_raid5.c,v 1.2 1999/01/26 02:34:01 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /******************************************************************************
30 *
31 * rf_raid5.c -- implements RAID Level 5
32 *
33 *****************************************************************************/
34
35 #include "rf_types.h"
36 #include "rf_raid.h"
37 #include "rf_raid5.h"
38 #include "rf_dag.h"
39 #include "rf_dagffrd.h"
40 #include "rf_dagffwr.h"
41 #include "rf_dagdegrd.h"
42 #include "rf_dagdegwr.h"
43 #include "rf_dagutils.h"
44 #include "rf_threadid.h"
45 #include "rf_general.h"
46 #include "rf_map.h"
47 #include "rf_utils.h"
48
49 typedef struct RF_Raid5ConfigInfo_s {
50 RF_RowCol_t **stripeIdentifier; /* filled in at config time and used by IdentifyStripe */
51 } RF_Raid5ConfigInfo_t;
52
53 int rf_ConfigureRAID5(
54 RF_ShutdownList_t **listp,
55 RF_Raid_t *raidPtr,
56 RF_Config_t *cfgPtr)
57 {
58 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
59 RF_Raid5ConfigInfo_t *info;
60 RF_RowCol_t i, j, startdisk;
61
62 /* create a RAID level 5 configuration structure */
63 RF_MallocAndAdd(info, sizeof(RF_Raid5ConfigInfo_t), (RF_Raid5ConfigInfo_t *), raidPtr->cleanupList);
64 if (info == NULL)
65 return(ENOMEM);
66 layoutPtr->layoutSpecificInfo = (void *) info;
67
68 RF_ASSERT(raidPtr->numRow == 1);
69
70 /* the stripe identifier must identify the disks in each stripe,
71 * IN THE ORDER THAT THEY APPEAR IN THE STRIPE.
72 */
73 info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList);
74 if (info->stripeIdentifier == NULL)
75 return(ENOMEM);
76 startdisk = 0;
77 for (i=0; i<raidPtr->numCol; i++) {
78 for (j=0; j<raidPtr->numCol; j++) {
79 info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol;
80 }
81 if ((--startdisk) < 0) startdisk = raidPtr->numCol-1;
82 }
83
84 /* fill in the remaining layout parameters */
85 layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk;
86 layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
87 layoutPtr->numDataCol = raidPtr->numCol-1;
88 layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
89 layoutPtr->numParityCol = 1;
90 layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk;
91
92 raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
93
94 return(0);
95 }
96
97 int rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t *raidPtr)
98 {
99 return(20);
100 }
101
102 RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t *raidPtr)
103 {
104 return(10);
105 }
106
107 #if !defined(__NetBSD__) && !defined(_KERNEL)
108 /* not currently used */
109 int rf_ShutdownRAID5(RF_Raid_t *raidPtr)
110 {
111 return(0);
112 }
113 #endif
114
115 void rf_MapSectorRAID5(
116 RF_Raid_t *raidPtr,
117 RF_RaidAddr_t raidSector,
118 RF_RowCol_t *row,
119 RF_RowCol_t *col,
120 RF_SectorNum_t *diskSector,
121 int remap)
122 {
123 RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
124 *row = 0;
125 *col = (SUID % raidPtr->numCol);
126 *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
127 (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
128 }
129
130 void rf_MapParityRAID5(
131 RF_Raid_t *raidPtr,
132 RF_RaidAddr_t raidSector,
133 RF_RowCol_t *row,
134 RF_RowCol_t *col,
135 RF_SectorNum_t *diskSector,
136 int remap)
137 {
138 RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
139
140 *row = 0;
141 *col = raidPtr->Layout.numDataCol-(SUID/raidPtr->Layout.numDataCol)%raidPtr->numCol;
142 *diskSector =(SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
143 (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
144 }
145
146 void rf_IdentifyStripeRAID5(
147 RF_Raid_t *raidPtr,
148 RF_RaidAddr_t addr,
149 RF_RowCol_t **diskids,
150 RF_RowCol_t *outRow)
151 {
152 RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr);
153 RF_Raid5ConfigInfo_t *info = (RF_Raid5ConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
154
155 *outRow = 0;
156 *diskids = info->stripeIdentifier[ stripeID % raidPtr->numCol ];
157 }
158
159 void rf_MapSIDToPSIDRAID5(
160 RF_RaidLayout_t *layoutPtr,
161 RF_StripeNum_t stripeID,
162 RF_StripeNum_t *psID,
163 RF_ReconUnitNum_t *which_ru)
164 {
165 *which_ru = 0;
166 *psID = stripeID;
167 }
168
169 /* select an algorithm for performing an access. Returns two pointers,
170 * one to a function that will return information about the DAG, and
171 * another to a function that will create the dag.
172 */
173 void rf_RaidFiveDagSelect(
174 RF_Raid_t *raidPtr,
175 RF_IoType_t type,
176 RF_AccessStripeMap_t *asmap,
177 RF_VoidFuncPtr *createFunc)
178 {
179 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
180 RF_PhysDiskAddr_t *failedPDA=NULL;
181 RF_RowCol_t frow, fcol;
182 RF_RowStatus_t rstat;
183 int prior_recon;
184 int tid;
185
186 RF_ASSERT(RF_IO_IS_R_OR_W(type));
187
188 if (asmap->numDataFailed + asmap->numParityFailed > 1) {
189 RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n");
190 /* *infoFunc = */ *createFunc = NULL;
191 return;
192 } else if (asmap->numDataFailed + asmap->numParityFailed == 1) {
193
194 /* if under recon & already reconstructed, redirect the access to the spare drive
195 * and eliminate the failure indication
196 */
197 failedPDA = asmap->failedPDAs[0];
198 frow = failedPDA->row; fcol = failedPDA->col;
199 rstat = raidPtr->status[failedPDA->row];
200 prior_recon = (rstat == rf_rs_reconfigured) || (
201 (rstat == rf_rs_reconstructing) ?
202 rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0
203 );
204 if (prior_recon) {
205 RF_RowCol_t or = failedPDA->row,oc=failedPDA->col;
206 RF_SectorNum_t oo=failedPDA->startSector;
207
208 if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { /* redirect to dist spare space */
209
210 if (failedPDA == asmap->parityInfo) {
211
212 /* parity has failed */
213 (layoutPtr->map->MapParity)(raidPtr, failedPDA->raidAddress, &failedPDA->row,
214 &failedPDA->col, &failedPDA->startSector, RF_REMAP);
215
216 if (asmap->parityInfo->next) { /* redir 2nd component, if any */
217 RF_PhysDiskAddr_t *p = asmap->parityInfo->next;
218 RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit;
219 p->row = failedPDA->row;
220 p->col = failedPDA->col;
221 p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) +
222 SUoffs; /* cheating: startSector is not really a RAID address */
223 }
224
225 } else if (asmap->parityInfo->next && failedPDA == asmap->parityInfo->next) {
226 RF_ASSERT(0); /* should not ever happen */
227 } else {
228
229 /* data has failed */
230 (layoutPtr->map->MapSector)(raidPtr, failedPDA->raidAddress, &failedPDA->row,
231 &failedPDA->col, &failedPDA->startSector, RF_REMAP);
232
233 }
234
235 } else { /* redirect to dedicated spare space */
236
237 failedPDA->row = raidPtr->Disks[frow][fcol].spareRow;
238 failedPDA->col = raidPtr->Disks[frow][fcol].spareCol;
239
240 /* the parity may have two distinct components, both of which may need to be redirected */
241 if (asmap->parityInfo->next) {
242 if (failedPDA == asmap->parityInfo) {
243 failedPDA->next->row = failedPDA->row;
244 failedPDA->next->col = failedPDA->col;
245 } else if (failedPDA == asmap->parityInfo->next) { /* paranoid: should never occur */
246 asmap->parityInfo->row = failedPDA->row;
247 asmap->parityInfo->col = failedPDA->col;
248 }
249 }
250 }
251
252 RF_ASSERT(failedPDA->col != -1);
253
254 if (rf_dagDebug || rf_mapDebug) {
255 rf_get_threadid(tid);
256 printf("[%d] Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n",
257 tid,type,or,oc,(long)oo,failedPDA->row,failedPDA->col,
258 (long)failedPDA->startSector);
259 }
260
261 asmap->numDataFailed = asmap->numParityFailed = 0;
262 }
263
264 }
265
266 /* all dags begin/end with block/unblock node
267 * therefore, hdrSucc & termAnt counts should always be 1
268 * also, these counts should not be visible outside dag creation routines -
269 * manipulating the counts here should be removed */
270 if (type == RF_IO_TYPE_READ) {
271 if (asmap->numDataFailed == 0)
272 *createFunc = (RF_VoidFuncPtr)rf_CreateFaultFreeReadDAG;
273 else
274 *createFunc = (RF_VoidFuncPtr)rf_CreateRaidFiveDegradedReadDAG;
275 } else {
276
277
278 /* if mirroring, always use large writes. If the access requires two
279 * distinct parity updates, always do a small write. If the stripe
280 * contains a failure but the access does not, do a small write.
281 * The first conditional (numStripeUnitsAccessed <= numDataCol/2) uses a
282 * less-than-or-equal rather than just a less-than because when G is 3
283 * or 4, numDataCol/2 is 1, and I want single-stripe-unit updates to use
284 * just one disk.
285 */
286 if ( (asmap->numDataFailed + asmap->numParityFailed) == 0) {
287 if (rf_suppressLocksAndLargeWrites ||
288 (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol!=1)) ||
289 (asmap->parityInfo->next!=NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
290 *createFunc = (RF_VoidFuncPtr)rf_CreateSmallWriteDAG;
291 }
292 else
293 *createFunc = (RF_VoidFuncPtr)rf_CreateLargeWriteDAG;
294 }
295 else {
296 if (asmap->numParityFailed == 1)
297 *createFunc = (RF_VoidFuncPtr)rf_CreateNonRedundantWriteDAG;
298 else
299 if (asmap->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit)
300 *createFunc = NULL;
301 else
302 *createFunc = (RF_VoidFuncPtr)rf_CreateDegradedWriteDAG;
303 }
304 }
305 }
306